diff -r 3e02d70cd07b Doc/library/pprint.rst --- a/Doc/library/pprint.rst Thu Apr 18 01:44:27 2013 +0200 +++ b/Doc/library/pprint.rst Wed Apr 17 21:42:25 2013 -0400 @@ -331,3 +331,14 @@ 'written in Python', 'version': '12.3.0'}, 'urls': [{...}, {...}]} + +Long bytes objects will only be wrapped on byte boundries: + + >>> pprint.pprint(b"\n\n\n\n\na\x00", width=1) + (b'\n' + b'\n' + b'\n' + b'\n' + b'\n' + b'a' + b'\x00') diff -r 3e02d70cd07b Lib/pprint.py --- a/Lib/pprint.py Thu Apr 18 01:44:27 2013 +0200 +++ b/Lib/pprint.py Wed Apr 17 21:42:25 2013 -0400 @@ -240,7 +240,7 @@ write(endchar) return - if issubclass(typ, str) and len(object) > 0 and r is str.__repr__: + if issubclass(typ, str) and _len(object) > 0 and r is str.__repr__: def _str_parts(s): """ Return a list of string literals comprising the repr() @@ -255,10 +255,10 @@ # A list of alternating (non-space, space) strings parts = re.split(r'(\s+)', line) + [''] current = '' - for i in range(0, len(parts), 2): + for i in range(0, _len(parts), 2): part = parts[i] + parts[i+1] candidate = current + part - if len(repr(candidate)) > max_width: + if _len(repr(candidate)) > max_width: if current: yield repr(current) current = part @@ -271,6 +271,41 @@ write('\n' + ' '*indent) write(rep) return + + if issubclass(typ, bytes) and _len(object) > 1 and \ + r is bytes.__repr__: + # 0 or 1 length bytes will always be un-wrapped + def _bytes_parts(b): + """Return a list of bytes literals comprising the repr() + of the given bytes using tuple line concatenation.""" + + prefix = "" # Also acts as the 'is first line' flag + current = b[:1] + prefixed_max_width = max_width-1 + for offset in range(1, _len(b)): + part = b[offset:offset+1] + candidate = current+part + if _len(repr(candidate))>prefixed_max_width: + # prefix is "(" if this is the first line + prefix = "(" if prefix=="" else " " + yield prefix+repr(current) + current = part + else: + current = candidate + + # Set suffix and prefix + if prefix=="": + suffix = "" + else: + prefix = " " + suffix = ")" + yield prefix+repr(current)+suffix + + for i, rep in enumerate(_bytes_parts(object)): + if i > 0: + write('\n' + ' '*indent) + write(rep) + return write(rep) def _repr(self, object, context, level): diff -r 3e02d70cd07b Lib/test/test_pprint.py --- a/Lib/test/test_pprint.py Thu Apr 18 01:44:27 2013 +0200 +++ b/Lib/test/test_pprint.py Wed Apr 17 21:42:25 2013 -0400 @@ -513,6 +513,88 @@ formatted = pprint.pformat(special, width=width) self.assertEqual(eval("(" + formatted + ")"), special) + def test_bytes_wrap_as_tuple(self): + # pprint tries to wrap long bytes literals intelligently + def _t_raw(formatted, width): + def _t_line(line, prefix, suffix, cant_be_zero): + if prefix: + self.assertEqual(prefix, line[0]) + eval_start = 1 + else: + eval_start = 0 + + if suffix: + self.assertEqual(suffix, line[-1]) + eval_end = -1 + else: + eval_end = len(line) + + eval_line = line[eval_start:eval_end] + line_bytes = len(eval(eval_line)) + if cant_be_zero: + self.assertNotEqual(0, line_bytes) + if line_bytes>1: + self.assertLessEqual(len(line), width, formatted) + + lines = formatted.split("\n") + if len(lines)>1: + _t_line(lines[0], prefix="(", suffix=None, cant_be_zero=True) + for line in lines[1:-1]: + _t_line(line, prefix=" ", suffix=None, cant_be_zero=True) + _t_line(lines[-1], prefix=" ", suffix=")", cant_be_zero=True) + else: + _t_line(formatted, prefix=None, suffix=None, cant_be_zero=False) + + + TEST_PARAMS = [ + (b'', 20, "b''"), + (b'\n', 20, "b'\\n'"), + (b'a', 20, "b'a'"), + (b'aa', 1, "(b'a'\n b'a')"), + (b'\x00', 20, "b'\\x00'"), + (b'\\', 20, "b'\\\\'"), + (b'\n\n\n\n\n\n', 5, """\ +(b'\\n' + b'\\n' + b'\\n' + b'\\n' + b'\\n' + b'\\n')"""), + + # -- Worst case, always print one byte per line if width is less than + (b'\n\n\n\n\n\n', 1, """\ +(b'\\n' + b'\\n' + b'\\n' + b'\\n' + b'\\n' + b'\\n')"""), + (b'\n\n\n\n\n\n', 1000, "b'\\n\\n\\n\\n\\n\\n'"), + + (b'\x00\xff'*20, 79, """\ +(b'\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff' + b'\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff' + b'\\x00\\xff\\x00\\xff')"""), + + ({"a":b'\x00\xff'*20,"c":b'\n'}, 79, """\ +{'a': (b'\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff' + b'\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff' + b'\\x00\\xff\\x00\\xff\\x00\\xff\\x00\\xff'), + 'c': b'\\n'}"""), + # -- Be sure to exercise the len==1 short circuit + ({"a":b"\n", "c":b"\x00"}, 5, """\ +{'a': b'\\n', + 'c': b'\\x00'}"""), + ] + + for input, width, expected in TEST_PARAMS: + with self.subTest(input=input, width=width, expected=expected): + formatted = pprint.pformat(input, width=width) + if issubclass(type(input), bytes): + # Run _t_raw on plain bytes inputs to check assumptions + _t_raw(formatted, width) + self.assertEqual(formatted, expected) + self.assertEqual(eval(formatted), input) class DottedPrettyPrinter(pprint.PrettyPrinter):