diff -r 4d00d0109147 Include/bytesobject.h --- a/Include/bytesobject.h Wed Jan 07 00:37:01 2015 +1000 +++ b/Include/bytesobject.h Tue Jan 06 07:31:06 2015 -0800 @@ -56,20 +56,21 @@ PyAPI_FUNC(PyObject *) PyBytes_FromForma PyAPI_FUNC(PyObject *) PyBytes_FromFormat(const char*, ...) Py_GCC_ATTRIBUTE((format(printf, 1, 2))); PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *); PyAPI_FUNC(char *) PyBytes_AsString(PyObject *); PyAPI_FUNC(PyObject *) PyBytes_Repr(PyObject *, int); PyAPI_FUNC(void) PyBytes_Concat(PyObject **, PyObject *); PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *); #ifndef Py_LIMITED_API PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t); #endif +PyAPI_FUNC(PyObject *) PyBytes_Format(PyObject *, PyObject *); PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, const char *, Py_ssize_t, const char *); /* Macro, trading safety for speed */ #ifndef Py_LIMITED_API #define PyBytes_AS_STRING(op) (assert(PyBytes_Check(op)), \ (((PyBytesObject *)(op))->ob_sval)) #define PyBytes_GET_SIZE(op) (assert(PyBytes_Check(op)),Py_SIZE(op)) #endif diff -r 4d00d0109147 Include/unicodeobject.h --- a/Include/unicodeobject.h Wed Jan 07 00:37:01 2015 +1000 +++ b/Include/unicodeobject.h Tue Jan 06 07:31:06 2015 -0800 @@ -2238,20 +2238,22 @@ PyAPI_FUNC(int) Py_UNICODE_strncmp( PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr( const Py_UNICODE *s, Py_UNICODE c ); PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr( const Py_UNICODE *s, Py_UNICODE c ); +PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int); + /* Create a copy of a unicode string ending with a nul character. Return NULL and raise a MemoryError exception on memory allocation failure, otherwise return a new allocated buffer (use PyMem_Free() to free the buffer). */ PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy( PyObject *unicode ); #endif /* Py_LIMITED_API */ #if defined(Py_DEBUG) && !defined(Py_LIMITED_API) diff -r 4d00d0109147 Lib/test/test_format.py --- a/Lib/test/test_format.py Wed Jan 07 00:37:01 2015 +1000 +++ b/Lib/test/test_format.py Tue Jan 06 07:31:06 2015 -0800 @@ -2,21 +2,21 @@ from test.support import verbose, TestFa import locale import sys import test.support as support import unittest maxsize = support.MAX_Py_ssize_t # test string formatting operator (I am not sure if this is being tested # elsewhere but, surely, some of the given cases are *not* tested because # they crash python) -# test on unicode strings as well +# test on bytes object as well def testformat(formatstr, args, output=None, limit=None, overflowok=False): if verbose: if output: print("{!a} % {!a} =? {!a} ...".format(formatstr, args, output), end=' ') else: print("{!a} % {!a} works? ...".format(formatstr, args), end=' ') try: result = formatstr % args @@ -39,195 +39,223 @@ def testformat(formatstr, args, output=N elif output and limit is not None and ( len(result)!=len(output) or result[:limit]!=output[:limit]): if verbose: print('no') print("%s %% %s == %s != %s" % \ (repr(formatstr), repr(args), repr(result), repr(output))) else: if verbose: print('yes') +def testcommon(formatstr, args, output=None, limit=None, overflowok=False): + # if formatstr is a str, test str, bytes, and bytearray; + # otherwise, test bytes and bytearry + if isinstance(formatstr, str): + testformat(formatstr, args, output, limit, overflowok) + b_format = formatstr.encode('ascii') + else: + b_format = formatstr + ba_format = bytearray(b_format) + b_args = [] + if not isinstance(args, tuple): + args = (args, ) + b_args = tuple(args) + if output is None: + b_output = ba_output = None + else: + if isinstance(output, str): + b_output = output.encode('ascii') + else: + b_output = output + ba_output = bytearray(b_output) + testformat(b_format, b_args, b_output, limit, overflowok) + # testformat(ba_format, b_args, ba_output, limit, overflowok) + class FormatTest(unittest.TestCase): - def test_format(self): - testformat("%.1d", (1,), "1") - testformat("%.*d", (sys.maxsize,1), overflowok=True) # expect overflow - testformat("%.100d", (1,), '00000000000000000000000000000000000000' + + def test_common_format(self): + # test the format identifiers that work the same across + # str, bytes, and bytearrays (integer, float, oct, hex) + testcommon("%.1d", (1,), "1") + testcommon("%.*d", (sys.maxsize,1), overflowok=True) # expect overflow + testcommon("%.100d", (1,), '00000000000000000000000000000000000000' '000000000000000000000000000000000000000000000000000000' '00000001', overflowok=True) - testformat("%#.117x", (1,), '0x00000000000000000000000000000000000' + testcommon("%#.117x", (1,), '0x00000000000000000000000000000000000' '000000000000000000000000000000000000000000000000000000' '0000000000000000000000000001', overflowok=True) - testformat("%#.118x", (1,), '0x00000000000000000000000000000000000' + testcommon("%#.118x", (1,), '0x00000000000000000000000000000000000' '000000000000000000000000000000000000000000000000000000' '00000000000000000000000000001', overflowok=True) - testformat("%f", (1.0,), "1.000000") + testcommon("%f", (1.0,), "1.000000") # these are trying to test the limits of the internal magic-number-length # formatting buffer, if that number changes then these tests are less # effective - testformat("%#.*g", (109, -1.e+49/3.)) - testformat("%#.*g", (110, -1.e+49/3.)) - testformat("%#.*g", (110, -1.e+100/3.)) + testcommon("%#.*g", (109, -1.e+49/3.)) + testcommon("%#.*g", (110, -1.e+49/3.)) + testcommon("%#.*g", (110, -1.e+100/3.)) # test some ridiculously large precision, expect overflow - testformat('%12.*f', (123456, 1.0)) + testcommon('%12.*f', (123456, 1.0)) # check for internal overflow validation on length of precision # these tests should no longer cause overflow in Python # 2.7/3.1 and later. - testformat("%#.*g", (110, -1.e+100/3.)) - testformat("%#.*G", (110, -1.e+100/3.)) - testformat("%#.*f", (110, -1.e+100/3.)) - testformat("%#.*F", (110, -1.e+100/3.)) + testcommon("%#.*g", (110, -1.e+100/3.)) + testcommon("%#.*G", (110, -1.e+100/3.)) + testcommon("%#.*f", (110, -1.e+100/3.)) + testcommon("%#.*F", (110, -1.e+100/3.)) # Formatting of integers. Overflow is not ok - testformat("%x", 10, "a") - testformat("%x", 100000000000, "174876e800") - testformat("%o", 10, "12") - testformat("%o", 100000000000, "1351035564000") - testformat("%d", 10, "10") - testformat("%d", 100000000000, "100000000000") + testcommon("%x", 10, "a") + testcommon("%x", 100000000000, "174876e800") + testcommon("%o", 10, "12") + testcommon("%o", 100000000000, "1351035564000") + testcommon("%d", 10, "10") + testcommon("%d", 100000000000, "100000000000") big = 123456789012345678901234567890 - testformat("%d", big, "123456789012345678901234567890") - testformat("%d", -big, "-123456789012345678901234567890") - testformat("%5d", -big, "-123456789012345678901234567890") - testformat("%31d", -big, "-123456789012345678901234567890") - testformat("%32d", -big, " -123456789012345678901234567890") - testformat("%-32d", -big, "-123456789012345678901234567890 ") - testformat("%032d", -big, "-0123456789012345678901234567890") - testformat("%-032d", -big, "-123456789012345678901234567890 ") - testformat("%034d", -big, "-000123456789012345678901234567890") - testformat("%034d", big, "0000123456789012345678901234567890") - testformat("%0+34d", big, "+000123456789012345678901234567890") - testformat("%+34d", big, " +123456789012345678901234567890") - testformat("%34d", big, " 123456789012345678901234567890") - testformat("%.2d", big, "123456789012345678901234567890") - testformat("%.30d", big, "123456789012345678901234567890") - testformat("%.31d", big, "0123456789012345678901234567890") - testformat("%32.31d", big, " 0123456789012345678901234567890") - testformat("%d", float(big), "123456________________________", 6) + testcommon("%d", big, "123456789012345678901234567890") + testcommon("%d", -big, "-123456789012345678901234567890") + testcommon("%5d", -big, "-123456789012345678901234567890") + testcommon("%31d", -big, "-123456789012345678901234567890") + testcommon("%32d", -big, " -123456789012345678901234567890") + testcommon("%-32d", -big, "-123456789012345678901234567890 ") + testcommon("%032d", -big, "-0123456789012345678901234567890") + testcommon("%-032d", -big, "-123456789012345678901234567890 ") + testcommon("%034d", -big, "-000123456789012345678901234567890") + testcommon("%034d", big, "0000123456789012345678901234567890") + testcommon("%0+34d", big, "+000123456789012345678901234567890") + testcommon("%+34d", big, " +123456789012345678901234567890") + testcommon("%34d", big, " 123456789012345678901234567890") + testcommon("%.2d", big, "123456789012345678901234567890") + testcommon("%.30d", big, "123456789012345678901234567890") + testcommon("%.31d", big, "0123456789012345678901234567890") + testcommon("%32.31d", big, " 0123456789012345678901234567890") + testcommon("%d", float(big), "123456________________________", 6) big = 0x1234567890abcdef12345 # 21 hex digits - testformat("%x", big, "1234567890abcdef12345") - testformat("%x", -big, "-1234567890abcdef12345") - testformat("%5x", -big, "-1234567890abcdef12345") - testformat("%22x", -big, "-1234567890abcdef12345") - testformat("%23x", -big, " -1234567890abcdef12345") - testformat("%-23x", -big, "-1234567890abcdef12345 ") - testformat("%023x", -big, "-01234567890abcdef12345") - testformat("%-023x", -big, "-1234567890abcdef12345 ") - testformat("%025x", -big, "-0001234567890abcdef12345") - testformat("%025x", big, "00001234567890abcdef12345") - testformat("%0+25x", big, "+0001234567890abcdef12345") - testformat("%+25x", big, " +1234567890abcdef12345") - testformat("%25x", big, " 1234567890abcdef12345") - testformat("%.2x", big, "1234567890abcdef12345") - testformat("%.21x", big, "1234567890abcdef12345") - testformat("%.22x", big, "01234567890abcdef12345") - testformat("%23.22x", big, " 01234567890abcdef12345") - testformat("%-23.22x", big, "01234567890abcdef12345 ") - testformat("%X", big, "1234567890ABCDEF12345") - testformat("%#X", big, "0X1234567890ABCDEF12345") - testformat("%#x", big, "0x1234567890abcdef12345") - testformat("%#x", -big, "-0x1234567890abcdef12345") - testformat("%#.23x", -big, "-0x001234567890abcdef12345") - testformat("%#+.23x", big, "+0x001234567890abcdef12345") - testformat("%# .23x", big, " 0x001234567890abcdef12345") - testformat("%#+.23X", big, "+0X001234567890ABCDEF12345") - testformat("%#-+.23X", big, "+0X001234567890ABCDEF12345") - testformat("%#-+26.23X", big, "+0X001234567890ABCDEF12345") - testformat("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ") - testformat("%#+27.23X", big, " +0X001234567890ABCDEF12345") + testcommon("%x", big, "1234567890abcdef12345") + testcommon("%x", -big, "-1234567890abcdef12345") + testcommon("%5x", -big, "-1234567890abcdef12345") + testcommon("%22x", -big, "-1234567890abcdef12345") + testcommon("%23x", -big, " -1234567890abcdef12345") + testcommon("%-23x", -big, "-1234567890abcdef12345 ") + testcommon("%023x", -big, "-01234567890abcdef12345") + testcommon("%-023x", -big, "-1234567890abcdef12345 ") + testcommon("%025x", -big, "-0001234567890abcdef12345") + testcommon("%025x", big, "00001234567890abcdef12345") + testcommon("%0+25x", big, "+0001234567890abcdef12345") + testcommon("%+25x", big, " +1234567890abcdef12345") + testcommon("%25x", big, " 1234567890abcdef12345") + testcommon("%.2x", big, "1234567890abcdef12345") + testcommon("%.21x", big, "1234567890abcdef12345") + testcommon("%.22x", big, "01234567890abcdef12345") + testcommon("%23.22x", big, " 01234567890abcdef12345") + testcommon("%-23.22x", big, "01234567890abcdef12345 ") + testcommon("%X", big, "1234567890ABCDEF12345") + testcommon("%#X", big, "0X1234567890ABCDEF12345") + testcommon("%#x", big, "0x1234567890abcdef12345") + testcommon("%#x", -big, "-0x1234567890abcdef12345") + testcommon("%#.23x", -big, "-0x001234567890abcdef12345") + testcommon("%#+.23x", big, "+0x001234567890abcdef12345") + testcommon("%# .23x", big, " 0x001234567890abcdef12345") + testcommon("%#+.23X", big, "+0X001234567890ABCDEF12345") + testcommon("%#-+.23X", big, "+0X001234567890ABCDEF12345") + testcommon("%#-+26.23X", big, "+0X001234567890ABCDEF12345") + testcommon("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ") + testcommon("%#+27.23X", big, " +0X001234567890ABCDEF12345") # next one gets two leading zeroes from precision, and another from the # 0 flag and the width - testformat("%#+027.23X", big, "+0X0001234567890ABCDEF12345") + testcommon("%#+027.23X", big, "+0X0001234567890ABCDEF12345") # same, except no 0 flag - testformat("%#+27.23X", big, " +0X001234567890ABCDEF12345") + testcommon("%#+27.23X", big, " +0X001234567890ABCDEF12345") big = 0o12345670123456701234567012345670 # 32 octal digits - testformat("%o", big, "12345670123456701234567012345670") - testformat("%o", -big, "-12345670123456701234567012345670") - testformat("%5o", -big, "-12345670123456701234567012345670") - testformat("%33o", -big, "-12345670123456701234567012345670") - testformat("%34o", -big, " -12345670123456701234567012345670") - testformat("%-34o", -big, "-12345670123456701234567012345670 ") - testformat("%034o", -big, "-012345670123456701234567012345670") - testformat("%-034o", -big, "-12345670123456701234567012345670 ") - testformat("%036o", -big, "-00012345670123456701234567012345670") - testformat("%036o", big, "000012345670123456701234567012345670") - testformat("%0+36o", big, "+00012345670123456701234567012345670") - testformat("%+36o", big, " +12345670123456701234567012345670") - testformat("%36o", big, " 12345670123456701234567012345670") - testformat("%.2o", big, "12345670123456701234567012345670") - testformat("%.32o", big, "12345670123456701234567012345670") - testformat("%.33o", big, "012345670123456701234567012345670") - testformat("%34.33o", big, " 012345670123456701234567012345670") - testformat("%-34.33o", big, "012345670123456701234567012345670 ") - testformat("%o", big, "12345670123456701234567012345670") - testformat("%#o", big, "0o12345670123456701234567012345670") - testformat("%#o", -big, "-0o12345670123456701234567012345670") - testformat("%#.34o", -big, "-0o0012345670123456701234567012345670") - testformat("%#+.34o", big, "+0o0012345670123456701234567012345670") - testformat("%# .34o", big, " 0o0012345670123456701234567012345670") - testformat("%#+.34o", big, "+0o0012345670123456701234567012345670") - testformat("%#-+.34o", big, "+0o0012345670123456701234567012345670") - testformat("%#-+37.34o", big, "+0o0012345670123456701234567012345670") - testformat("%#+37.34o", big, "+0o0012345670123456701234567012345670") + testcommon("%o", big, "12345670123456701234567012345670") + testcommon("%o", -big, "-12345670123456701234567012345670") + testcommon("%5o", -big, "-12345670123456701234567012345670") + testcommon("%33o", -big, "-12345670123456701234567012345670") + testcommon("%34o", -big, " -12345670123456701234567012345670") + testcommon("%-34o", -big, "-12345670123456701234567012345670 ") + testcommon("%034o", -big, "-012345670123456701234567012345670") + testcommon("%-034o", -big, "-12345670123456701234567012345670 ") + testcommon("%036o", -big, "-00012345670123456701234567012345670") + testcommon("%036o", big, "000012345670123456701234567012345670") + testcommon("%0+36o", big, "+00012345670123456701234567012345670") + testcommon("%+36o", big, " +12345670123456701234567012345670") + testcommon("%36o", big, " 12345670123456701234567012345670") + testcommon("%.2o", big, "12345670123456701234567012345670") + testcommon("%.32o", big, "12345670123456701234567012345670") + testcommon("%.33o", big, "012345670123456701234567012345670") + testcommon("%34.33o", big, " 012345670123456701234567012345670") + testcommon("%-34.33o", big, "012345670123456701234567012345670 ") + testcommon("%o", big, "12345670123456701234567012345670") + testcommon("%#o", big, "0o12345670123456701234567012345670") + testcommon("%#o", -big, "-0o12345670123456701234567012345670") + testcommon("%#.34o", -big, "-0o0012345670123456701234567012345670") + testcommon("%#+.34o", big, "+0o0012345670123456701234567012345670") + testcommon("%# .34o", big, " 0o0012345670123456701234567012345670") + testcommon("%#+.34o", big, "+0o0012345670123456701234567012345670") + testcommon("%#-+.34o", big, "+0o0012345670123456701234567012345670") + testcommon("%#-+37.34o", big, "+0o0012345670123456701234567012345670") + testcommon("%#+37.34o", big, "+0o0012345670123456701234567012345670") # next one gets one leading zero from precision - testformat("%.33o", big, "012345670123456701234567012345670") + testcommon("%.33o", big, "012345670123456701234567012345670") # base marker shouldn't change that, since "0" is redundant - testformat("%#.33o", big, "0o012345670123456701234567012345670") + testcommon("%#.33o", big, "0o012345670123456701234567012345670") # but reduce precision, and base marker should add a zero - testformat("%#.32o", big, "0o12345670123456701234567012345670") + testcommon("%#.32o", big, "0o12345670123456701234567012345670") # one leading zero from precision, and another from "0" flag & width - testformat("%034.33o", big, "0012345670123456701234567012345670") + testcommon("%034.33o", big, "0012345670123456701234567012345670") # base marker shouldn't change that - testformat("%0#34.33o", big, "0o012345670123456701234567012345670") + testcommon("%0#34.33o", big, "0o012345670123456701234567012345670") # Some small ints, in both Python int and flavors). - testformat("%d", 42, "42") - testformat("%d", -42, "-42") - testformat("%d", 42, "42") - testformat("%d", -42, "-42") - testformat("%d", 42.0, "42") - testformat("%#x", 1, "0x1") - testformat("%#x", 1, "0x1") - testformat("%#X", 1, "0X1") - testformat("%#X", 1, "0X1") - testformat("%#o", 1, "0o1") - testformat("%#o", 1, "0o1") - testformat("%#o", 0, "0o0") - testformat("%#o", 0, "0o0") - testformat("%o", 0, "0") - testformat("%o", 0, "0") - testformat("%d", 0, "0") - testformat("%d", 0, "0") - testformat("%#x", 0, "0x0") - testformat("%#x", 0, "0x0") - testformat("%#X", 0, "0X0") - testformat("%#X", 0, "0X0") - testformat("%x", 0x42, "42") - testformat("%x", -0x42, "-42") - testformat("%x", 0x42, "42") - testformat("%x", -0x42, "-42") - testformat("%o", 0o42, "42") - testformat("%o", -0o42, "-42") - testformat("%o", 0o42, "42") - testformat("%o", -0o42, "-42") + testcommon("%d", 42, "42") + testcommon("%d", -42, "-42") + testcommon("%d", 42, "42") + testcommon("%d", -42, "-42") + testcommon("%d", 42.0, "42") + testcommon("%#x", 1, "0x1") + testcommon("%#x", 1, "0x1") + testcommon("%#X", 1, "0X1") + testcommon("%#X", 1, "0X1") + testcommon("%#o", 1, "0o1") + testcommon("%#o", 1, "0o1") + testcommon("%#o", 0, "0o0") + testcommon("%#o", 0, "0o0") + testcommon("%o", 0, "0") + testcommon("%o", 0, "0") + testcommon("%d", 0, "0") + testcommon("%d", 0, "0") + testcommon("%#x", 0, "0x0") + testcommon("%#x", 0, "0x0") + testcommon("%#X", 0, "0X0") + testcommon("%#X", 0, "0X0") + testcommon("%x", 0x42, "42") + testcommon("%x", -0x42, "-42") + testcommon("%x", 0x42, "42") + testcommon("%x", -0x42, "-42") + testcommon("%o", 0o42, "42") + testcommon("%o", -0o42, "-42") + testcommon("%o", 0o42, "42") + testcommon("%o", -0o42, "-42") + # alternate float formatting + testcommon('%g', 1.1, '1.1') + testcommon('%#g', 1.1, '1.10000') + + def test_str_format(self): testformat("%r", "\u0378", "'\\u0378'") # non printable testformat("%a", "\u0378", "'\\u0378'") # non printable testformat("%r", "\u0374", "'\u0374'") # printable testformat("%a", "\u0374", "'\\u0374'") # printable - # alternate float formatting - testformat('%g', 1.1, '1.1') - testformat('%#g', 1.1, '1.10000') - - # Test exception for unknown format characters + # Test exception for unknown format characters, etc. if verbose: print('Testing exceptions') def test_exc(formatstr, args, exception, excmsg): try: testformat(formatstr, args) except exception as exc: if str(exc) == excmsg: if verbose: print("yes") else: @@ -240,22 +268,97 @@ class FormatTest(unittest.TestCase): else: raise TestFailed('did not get expected exception: %s' % excmsg) test_exc('abc %b', 1, ValueError, "unsupported format character 'b' (0x62) at index 5") #test_exc(unicode('abc %\u3000','raw-unicode-escape'), 1, ValueError, # "unsupported format character '?' (0x3000) at index 5") test_exc('%d', '1', TypeError, "%d format: a number is required, not str") test_exc('%g', '1', TypeError, "a float is required") test_exc('no format', '1', TypeError, "not all arguments converted during string formatting") - test_exc('no format', '1', TypeError, - "not all arguments converted during string formatting") + + if maxsize == 2**31-1: + # crashes 2.2.1 and earlier: + try: + "%*d"%(maxsize, -127) + except MemoryError: + pass + else: + raise TestFailed('"%*d"%(maxsize, -127) should fail') + + def test_bytes_and_bytearray_format(self): + # %c will insert a single byte, either from an int in range(256), or + # from a bytes argument of length 1, not from a str. + testcommon(b"%c", 7, b"\x07") + testcommon(b"%c", b"Z", b"Z") + # testcommon(b"%c", bytearray(b"Z"), b"Z") + # %b will insert a series of bytes, either from a type that supports + # the Py_buffer protocol, or something that has a __bytes__ method + class FakeBytes(object): + def __bytes__(self): + return b'123' + fb = FakeBytes() + testcommon(b"%b", b"abc", b"abc") + # testcommon(b"%b", bytearray(b"def"), b"def") + testcommon(b"%b", fb, b"123") + # %s is an alias for %b -- should only be used for Py2/3 code + testcommon(b"%s", b"abc", b"abc") + # testcommon(b"%s", bytearray(b"def"), b"def") + testcommon(b"%s", fb, b"123") + # %a will give the equivalent of + # repr(some_obj).encode('ascii', 'backslashreplace') + testcommon(b"%a", 3.14, b"3.14") + testcommon(b"%a", b"ghi", b"b'ghi'") + testcommon(b"%a", "jkl", b"'jkl'") + testcommon(b"%a", "\u0544", b"'\\u0544'") + + # Test exception for unknown format characters, etc. + if verbose: + print('Testing exceptions') + def test_exc(formatstr, args, exception, excmsg): + try: + testformat(formatstr, args) + except exception as exc: + if str(exc) == excmsg: + if verbose: + print("yes") + else: + if verbose: print('no') + print('Unexpected ', exception, ':', repr(str(exc))) + except: + if verbose: print('no') + print('Unexpected exception') + raise + else: + raise TestFailed('did not get expected exception: %s' % excmsg) + test_exc(b'%d', '1', TypeError, + "%d format: a number is required, not str") + test_exc(b'%d', b'1', TypeError, + "%d format: a number is required, not bytes") + test_exc(b'%g', '1', TypeError, "float argument required, not str") + test_exc(b'%g', b'1', TypeError, "float argument required, not bytes") + test_exc(b'no format', 7, TypeError, + "not all arguments converted during bytes formatting") + test_exc(b'no format', b'1', TypeError, + "not all arguments converted during bytes formatting") + test_exc(b'no format', bytearray(b'1'), TypeError, + "not all arguments converted during bytes formatting") + test_exc(b"%c", 256, TypeError, + "%c requires an integer in range(256) or a single byte") + test_exc(b"%c", b"Za", TypeError, + "%c requires an integer in range(256) or a single byte") + test_exc(b"%c", "Yb", TypeError, + "%c requires an integer in range(256) or a single byte") + test_exc(b"%b", "Xc", TypeError, + "%b requires bytes, or an object that implements __bytes__, not 'str'") + test_exc(b"%s", "Wd", TypeError, + "%b requires bytes, or an object that implements __bytes__, not 'str'") if maxsize == 2**31-1: # crashes 2.2.1 and earlier: try: "%*d"%(maxsize, -127) except MemoryError: pass else: raise TestFailed('"%*d"%(maxsize, -127) should fail') diff -r 4d00d0109147 Objects/abstract.c --- a/Objects/abstract.c Wed Jan 07 00:37:01 2015 +1000 +++ b/Objects/abstract.c Tue Jan 06 07:31:06 2015 -0800 @@ -679,22 +679,23 @@ PyObject_Format(PyObject *obj, PyObject "Type %.100s doesn't define __format__", Py_TYPE(obj)->tp_name); goto done; } /* And call it. */ result = PyObject_CallFunctionObjArgs(meth, format_spec, NULL); Py_DECREF(meth); if (result && !PyUnicode_Check(result)) { - PyErr_SetString(PyExc_TypeError, - "__format__ method did not return string"); + PyErr_Format(PyExc_TypeError, + "__format__ must return a str, not %.200s", + Py_TYPE(result)->tp_name); Py_DECREF(result); result = NULL; goto done; } done: Py_XDECREF(empty); return result; } /* Operations on numbers */ diff -r 4d00d0109147 Objects/bytesobject.c --- a/Objects/bytesobject.c Wed Jan 07 00:37:01 2015 +1000 +++ b/Objects/bytesobject.c Tue Jan 06 07:31:06 2015 -0800 @@ -391,22 +391,630 @@ PyBytes_FromFormat(const char *format, . va_list vargs; #ifdef HAVE_STDARG_PROTOTYPES va_start(vargs, format); #else va_start(vargs); #endif ret = PyBytes_FromFormatV(format, vargs); va_end(vargs); return ret; +} + +/* Helpers for formatstring */ + +Py_LOCAL_INLINE(PyObject *) +getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) +{ + Py_ssize_t argidx = *p_argidx; + if (argidx < arglen) { + (*p_argidx)++; + if (arglen < 0) + return args; + else + return PyTuple_GetItem(args, argidx); + } + PyErr_SetString(PyExc_TypeError, + "not enough arguments for format string"); + return NULL; } +/* Format codes + * F_LJUST '-' + * F_SIGN '+' + * F_BLANK ' ' + * F_ALT '#' + * F_ZERO '0' + */ +#define F_LJUST (1<<0) +#define F_SIGN (1<<1) +#define F_BLANK (1<<2) +#define F_ALT (1<<3) +#define F_ZERO (1<<4) + +/* Returns a new reference to a PyBytes object, or NULL on failure. */ + +static PyObject * +formatfloat(PyObject *v, int flags, int prec, int type) +{ + char *p; + PyObject *result; + double x; + + x = PyFloat_AsDouble(v); + if (x == -1.0 && PyErr_Occurred()) { + PyErr_Format(PyExc_TypeError, "float argument required, " + "not %.200s", Py_TYPE(v)->tp_name); + return NULL; + } + + if (prec < 0) + prec = 6; + + p = PyOS_double_to_string(x, type, prec, + (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); + + if (p == NULL) + return NULL; + result = PyBytes_FromStringAndSize(p, strlen(p)); + PyMem_Free(p); + return result; +} + +/* format_long emulates the format codes d, u, o, x and X, and + * the F_ALT flag, for Python's long (unbounded) ints. It's not used for + * Python's regular ints. + * Return value: a new PyBytes*, or NULL if error. + * . *pbuf is set to point into it, + * *plen set to the # of chars following that. + * Caller must decref it when done using pbuf. + * The string starting at *pbuf is of the form + * "-"? ("0x" | "0X")? digit+ + * "0x"/"0X" are present only for x and X conversions, with F_ALT + * set in flags. The case of hex digits will be correct, + * There will be at least prec digits, zero-filled on the left if + * necessary to get that many. + * val object to be converted + * flags bitmask of format flags; only F_ALT is looked at + * prec minimum number of digits; 0-fill on left if needed + * type a character in [duoxX]; u acts the same as d + * + * CAUTION: o, x and X conversions on regular ints can never + * produce a '-' sign, but can for Python's unbounded ints. + */ + +static PyObject * +format_long(PyObject *val, int flags, int prec, int type, + char **pbuf, int *plen) +{ + PyObject *s; + PyObject *result = NULL; + + s = _PyUnicode_FormatLong(val, flags & F_ALT, prec, type); + if (!s) + return NULL; + result = _PyUnicode_AsASCIIString(s, "strict"); + Py_DECREF(s); + if (!result) + return NULL; + *pbuf = PyBytes_AS_STRING(result); + *plen = PyBytes_GET_SIZE(result); + return result; +} + +Py_LOCAL_INLINE(int) +formatchar(char *buf, size_t buflen, PyObject *v) +{ + /* presume that the buffer is at least 2 characters long */ + if (PyBytes_Check(v)) { + if (!PyArg_Parse(v, "c;%c requires an integer in range(256) or a single byte", &buf[0])) + return -1; + } + else { + long ival = PyLong_AsLong(v); + if (ival == -1 && PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "%c requires an integer in range(256) or a single byte"); + return -1; + } + if (ival < 0 || ival > 255) { + PyErr_SetString(PyExc_TypeError, + "%c requires an integer in range(256) or a single byte"); + return -1; + } + buf[0] = ival; + } + buf[1] = '\0'; + return 1; +} + +static PyObject * +format_obj(PyObject *v) +{ + PyObject *result = NULL; + PyObject *func; + _Py_IDENTIFIER(__bytes__); + + /* is it a bytes object? */ + if (PyBytes_Check(v)) { + result = v; + Py_INCREF(v); + return result; + } + /* does it support __bytes__? */ + func = _PyObject_LookupSpecial(v, &PyId___bytes__); + if (func != NULL) { + result = PyObject_CallFunctionObjArgs(func, NULL); + Py_DECREF(func); + if (result == NULL) + return NULL; + if (!PyBytes_Check(result)) { + PyErr_Format(PyExc_TypeError, + "__bytes__ returned non-bytes (type %.200s)", + Py_TYPE(result)->tp_name); + Py_DECREF(result); + return NULL; + } + return result; + } + PyErr_Format(PyExc_TypeError, + "%%b requires bytes, or an object that implements __bytes__, not '%.100s'", + Py_TYPE(v)->tp_name); + return NULL; +} + +/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) + + FORMATBUFLEN is the length of the buffer in which the ints & + chars are formatted. XXX This is a magic number. Each formatting + routine does bounds checking to ensure no overflow, but a better + solution may be to malloc a buffer of appropriate size for each + format. For now, the current solution is sufficient. +*/ +#define FORMATBUFLEN (size_t)120 + +PyObject * +PyBytes_Format(PyObject *format, PyObject *args) +{ + char *fmt, *res; + Py_ssize_t arglen, argidx; + Py_ssize_t reslen, rescnt, fmtcnt; + int args_owned = 0; + PyObject *result; + PyObject *repr; + PyObject *dict = NULL; + if (format == NULL || !PyBytes_Check(format) || args == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + fmt = PyBytes_AS_STRING(format); + fmtcnt = PyBytes_GET_SIZE(format); + reslen = rescnt = fmtcnt + 100; + result = PyBytes_FromStringAndSize((char *)NULL, reslen); + if (result == NULL) + return NULL; + res = PyBytes_AsString(result); + if (PyTuple_Check(args)) { + arglen = PyTuple_GET_SIZE(args); + argidx = 0; + } + else { + arglen = -1; + argidx = -2; + } + if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript && + !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) && + !PyByteArray_Check(args)) { + dict = args; + } + while (--fmtcnt >= 0) { + if (*fmt != '%') { + if (--rescnt < 0) { + rescnt = fmtcnt + 100; + reslen += rescnt; + if (_PyBytes_Resize(&result, reslen)) + return NULL; + res = PyBytes_AS_STRING(result) + + reslen - rescnt; + --rescnt; + } + *res++ = *fmt++; + } + else { + /* Got a format specifier */ + int flags = 0; + Py_ssize_t width = -1; + int prec = -1; + int c = '\0'; + int fill; + int isnumok; + PyObject *v = NULL; + PyObject *temp = NULL; + Py_buffer buf; + char *pbuf; + int sign; + Py_ssize_t len; + char formatbuf[FORMATBUFLEN]; + /* For format{int,char}() */ + + buf.obj = NULL; + fmt++; + if (*fmt == '(') { + char *keystart; + Py_ssize_t keylen; + PyObject *key; + int pcount = 1; + + if (dict == NULL) { + PyErr_SetString(PyExc_TypeError, + "format requires a mapping"); + goto error; + } + ++fmt; + --fmtcnt; + keystart = fmt; + /* Skip over balanced parentheses */ + while (pcount > 0 && --fmtcnt >= 0) { + if (*fmt == ')') + --pcount; + else if (*fmt == '(') + ++pcount; + fmt++; + } + keylen = fmt - keystart - 1; + if (fmtcnt < 0 || pcount > 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format key"); + goto error; + } + key = PyBytes_FromStringAndSize(keystart, + keylen); + if (key == NULL) + goto error; + if (args_owned) { + Py_DECREF(args); + args_owned = 0; + } + args = PyObject_GetItem(dict, key); + Py_DECREF(key); + if (args == NULL) { + goto error; + } + args_owned = 1; + arglen = -1; + argidx = -2; + } + while (--fmtcnt >= 0) { + switch (c = *fmt++) { + case '-': flags |= F_LJUST; continue; + case '+': flags |= F_SIGN; continue; + case ' ': flags |= F_BLANK; continue; + case '#': flags |= F_ALT; continue; + case '0': flags |= F_ZERO; continue; + } + break; + } + if (c == '*') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + if (!PyLong_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "* wants int"); + goto error; + } + width = PyLong_AsSsize_t(v); + if (width == -1 && PyErr_Occurred()) + goto error; + if (width < 0) { + flags |= F_LJUST; + width = -width; + } + if (--fmtcnt >= 0) + c = *fmt++; + } + else if (c >= 0 && isdigit(c)) { + width = c - '0'; + while (--fmtcnt >= 0) { + c = Py_CHARMASK(*fmt++); + if (!isdigit(c)) + break; + if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) { + PyErr_SetString( + PyExc_ValueError, + "width too big"); + goto error; + } + width = width*10 + (c - '0'); + } + } + if (c == '.') { + prec = 0; + if (--fmtcnt >= 0) + c = *fmt++; + if (c == '*') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + if (!PyLong_Check(v)) { + PyErr_SetString( + PyExc_TypeError, + "* wants int"); + goto error; + } + prec = PyLong_AsSsize_t(v); + if (prec == -1 && PyErr_Occurred()) + goto error; + if (prec < 0) + prec = 0; + if (--fmtcnt >= 0) + c = *fmt++; + } + else if (c >= 0 && isdigit(c)) { + prec = c - '0'; + while (--fmtcnt >= 0) { + c = Py_CHARMASK(*fmt++); + if (!isdigit(c)) + break; + if (prec > (INT_MAX - ((int)c - '0')) / 10) { + PyErr_SetString( + PyExc_ValueError, + "prec too big"); + goto error; + } + prec = prec*10 + (c - '0'); + } + } + } /* prec */ + if (fmtcnt >= 0) { + if (c == 'h' || c == 'l' || c == 'L') { + if (--fmtcnt >= 0) + c = *fmt++; + } + } + if (fmtcnt < 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format"); + goto error; + } + if (c != '%') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + } + sign = 0; + fill = ' '; + switch (c) { + case '%': + pbuf = "%"; + len = 1; + break; + case 'a': + temp = PyObject_Repr(v); + if (temp == NULL) + goto error; + repr = PyUnicode_AsEncodedObject(temp, "ascii", "backslashreplace"); + if (repr == NULL) { + Py_DECREF(temp); + goto error; + } + if (_getbuffer(repr, &buf) < 0) { + temp = format_obj(repr); + if (temp == NULL) { + Py_DECREF(repr); + goto error; + } + Py_DECREF(repr); + repr = temp; + } + pbuf = PyBytes_AS_STRING(repr); + len = PyBytes_GET_SIZE(repr); + Py_DECREF(repr); + if (prec >= 0 && len > prec) + len = prec; + break; + case 's': + // %s is only for 2/3 code; 3 only code should use %b + case 'b': + temp = format_obj(v); + if (temp == NULL) + goto error; + pbuf = PyBytes_AS_STRING(temp); + len = PyBytes_GET_SIZE(temp); + if (prec >= 0 && len > prec) + len = prec; + break; + case 'i': + case 'd': + case 'u': + case 'o': + case 'x': + case 'X': + if (c == 'i') + c = 'd'; + isnumok = 0; + if (PyNumber_Check(v)) { + PyObject *iobj=NULL; + + if ((PyLong_Check(v))) { + iobj = v; + Py_INCREF(iobj); + } + else { + iobj = PyNumber_Long(v); + } + if (iobj!=NULL) { + if (PyLong_Check(iobj)) { + int ilen; + + isnumok = 1; + temp = format_long(iobj, flags, + prec, c, &pbuf, &ilen); + Py_DECREF(iobj); + len = ilen; + if (!temp) + goto error; + sign = 1; + } + else { + Py_DECREF(iobj); + } + } + } + if (!isnumok) { + PyErr_Format(PyExc_TypeError, + "%%%c format: a number is required, " + "not %.200s", c, Py_TYPE(v)->tp_name); + goto error; + } + if (flags & F_ZERO) + fill = '0'; + break; + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + temp = formatfloat(v, flags, prec, c); + if (temp == NULL) + goto error; + pbuf = PyBytes_AS_STRING(temp); + len = PyBytes_GET_SIZE(temp); + sign = 1; + if (flags & F_ZERO) + fill = '0'; + break; + case 'c': + pbuf = formatbuf; + len = formatchar(pbuf, sizeof(formatbuf), v); + if (len < 0) + goto error; + break; + default: + PyErr_Format(PyExc_ValueError, + "unsupported format character '%c' (0x%x) " + "at index %zd", + c, c, + (Py_ssize_t)(fmt - 1 - + PyBytes_AsString(format))); + goto error; + } + if (sign) { + if (*pbuf == '-' || *pbuf == '+') { + sign = *pbuf++; + len--; + } + else if (flags & F_SIGN) + sign = '+'; + else if (flags & F_BLANK) + sign = ' '; + else + sign = 0; + } + if (width < len) + width = len; + if (rescnt - (sign != 0) < width) { + reslen -= rescnt; + rescnt = width + fmtcnt + 100; + reslen += rescnt; + if (reslen < 0) { + Py_DECREF(result); + PyBuffer_Release(&buf); + Py_XDECREF(temp); + return PyErr_NoMemory(); + } + if (_PyBytes_Resize(&result, reslen)) { + PyBuffer_Release(&buf); + Py_XDECREF(temp); + return NULL; + } + res = PyBytes_AS_STRING(result) + + reslen - rescnt; + } + if (sign) { + if (fill != ' ') + *res++ = sign; + rescnt--; + if (width > len) + width--; + } + if ((flags & F_ALT) && (c == 'x' || c == 'X')) { + assert(pbuf[0] == '0'); + assert(pbuf[1] == c); + if (fill != ' ') { + *res++ = *pbuf++; + *res++ = *pbuf++; + } + rescnt -= 2; + width -= 2; + if (width < 0) + width = 0; + len -= 2; + } + if (width > len && !(flags & F_LJUST)) { + do { + --rescnt; + *res++ = fill; + } while (--width > len); + } + if (fill == ' ') { + if (sign) + *res++ = sign; + if ((flags & F_ALT) && + (c == 'x' || c == 'X')) { + assert(pbuf[0] == '0'); + assert(pbuf[1] == c); + *res++ = *pbuf++; + *res++ = *pbuf++; + } + } + Py_MEMCPY(res, pbuf, len); + res += len; + rescnt -= len; + while (--width >= len) { + --rescnt; + *res++ = ' '; + } + if (dict && (argidx < arglen) && c != '%') { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during bytes formatting"); + PyBuffer_Release(&buf); + Py_XDECREF(temp); + goto error; + } + PyBuffer_Release(&buf); + Py_XDECREF(temp); + } /* '%' */ + } /* until end */ + if (argidx < arglen && !dict) { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during bytes formatting"); + goto error; + } + if (args_owned) { + Py_DECREF(args); + } + if (_PyBytes_Resize(&result, reslen - rescnt)) + return NULL; + return result; + + error: + Py_DECREF(result); + if (args_owned) { + Py_DECREF(args); + } + return NULL; +} +/* =-= */ + + static void bytes_dealloc(PyObject *op) { Py_TYPE(op)->tp_free(op); } /* Unescape a backslash-escaped string. If unicode is non-zero, the string is a u-literal. If recode_encoding is non-zero, the string is UTF-8 encoded and should be re-encoded in the specified encoding. */ @@ -2989,20 +3597,35 @@ bytes_methods[] = { _Py_swapcase__doc__}, {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__}, BYTES_TRANSLATE_METHODDEF {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__}, {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__}, BYTES_SIZEOF_METHODDEF {NULL, NULL} /* sentinel */ }; static PyObject * +bytes_mod(PyObject *v, PyObject *w) +{ + if (!PyBytes_Check(v)) + Py_RETURN_NOTIMPLEMENTED; + return PyBytes_Format(v, w); +} + +static PyNumberMethods bytes_as_number = { + 0, /*nb_add*/ + 0, /*nb_subtract*/ + 0, /*nb_multiply*/ + bytes_mod, /*nb_remainder*/ +}; + +static PyObject * str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); static PyObject * bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *x = NULL; const char *encoding = NULL; const char *errors = NULL; PyObject *new = NULL; PyObject *func; @@ -3279,21 +3902,21 @@ PyTypeObject PyBytes_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "bytes", PyBytesObject_SIZE, sizeof(char), bytes_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_reserved */ (reprfunc)bytes_repr, /* tp_repr */ - 0, /* tp_as_number */ + &bytes_as_number, /* tp_as_number */ &bytes_as_sequence, /* tp_as_sequence */ &bytes_as_mapping, /* tp_as_mapping */ (hashfunc)bytes_hash, /* tp_hash */ 0, /* tp_call */ bytes_str, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ &bytes_as_buffer, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */ diff -r 4d00d0109147 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Wed Jan 07 00:37:01 2015 +1000 +++ b/Objects/unicodeobject.c Tue Jan 06 07:31:06 2015 -0800 @@ -13886,33 +13886,31 @@ formatfloat(PyObject *v, struct unicode_ * There will be at least prec digits, zero-filled on the left if * necessary to get that many. * val object to be converted * flags bitmask of format flags; only F_ALT is looked at * prec minimum number of digits; 0-fill on left if needed * type a character in [duoxX]; u acts the same as d * * CAUTION: o, x and X conversions on regular ints can never * produce a '-' sign, but can for Python's unbounded ints. */ -static PyObject* -formatlong(PyObject *val, struct unicode_format_arg_t *arg) +PyObject * +_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type) { PyObject *result = NULL; char *buf; Py_ssize_t i; int sign; /* 1 if '-', else 0 */ int len; /* number of characters */ Py_ssize_t llen; int numdigits; /* len == numnondigits + numdigits */ int numnondigits = 0; - int prec = arg->prec; - int type = arg->ch; /* Avoid exceeding SSIZE_T_MAX */ if (prec > INT_MAX-3) { PyErr_SetString(PyExc_OverflowError, "precision too large"); return NULL; } assert(PyLong_Check(val)); @@ -13947,31 +13945,31 @@ formatlong(PyObject *val, struct unicode if (Py_REFCNT(result) != 1) { Py_DECREF(result); PyErr_BadInternalCall(); return NULL; } buf = PyUnicode_DATA(result); llen = PyUnicode_GET_LENGTH(result); if (llen > INT_MAX) { Py_DECREF(result); PyErr_SetString(PyExc_ValueError, - "string too large in _PyBytes_FormatLong"); + "string too large in _PyUnicode_FormatLong"); return NULL; } len = (int)llen; sign = buf[0] == '-'; numnondigits += sign; numdigits = len - numnondigits; assert(numdigits > 0); /* Get rid of base marker unless F_ALT */ - if (((arg->flags & F_ALT) == 0 && + if (((alt) == 0 && (type == 'o' || type == 'x' || type == 'X'))) { assert(buf[sign] == '0'); assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' || buf[sign+1] == 'o'); numnondigits -= 2; buf += 2; len -= 2; if (sign) buf[0] = '-'; assert(len == numnondigits + numdigits); @@ -14092,21 +14090,21 @@ mainformatlong(PyObject *v, } if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) { Py_DECREF(iobj); return -1; } Py_DECREF(iobj); return 1; } - res = formatlong(iobj, arg); + res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type); Py_DECREF(iobj); if (res == NULL) return -1; *p_output = res; return 0; wrongtype: switch(type) { case 'o':