# HG changeset patch # Parent af092c1d37478812b242f71e1663e17c5cf12d1a Document, fix and test quoted-printable newline handling * \n by default (e.g. for soft line breaks) * CRLF if found in input (even in non-text) * Native Python implementation in quopri did not handle CRLF * Typo errors in documentation * quopri uses istext=True * header flag does not affect newline encoding; only istext affects it diff -r af092c1d3747 Doc/library/binascii.rst --- a/Doc/library/binascii.rst Sat Jan 17 21:09:16 2015 -0800 +++ b/Doc/library/binascii.rst Sun Jan 18 06:30:09 2015 +0000 @@ -62,7 +62,8 @@ .. function:: a2b_qp(string, header=False) Convert a block of quoted-printable data back to binary and return the binary - data. More than one line may be passed at a time. If the optional argument + data. More than one line may be passed at a time, using either + ``b"\r\n"`` or ``b"\n"`` for newlines. If the optional argument *header* is present and true, underscores will be decoded as spaces. .. versionchanged:: 3.2 @@ -71,15 +72,16 @@ .. function:: b2a_qp(data, quotetabs=False, istext=True, header=False) - Convert binary data to a line(s) of ASCII characters in quoted-printable + Convert binary data to line(s) of ASCII characters in quoted-printable encoding. The return value is the converted line(s). If the optional argument *quotetabs* is present and true, all tabs and spaces will be encoded. If the - optional argument *istext* is present and true, newlines are not encoded but + argument *istext* is true (the default), newlines are not encoded, but trailing whitespace will be encoded. If the optional argument *header* is - present and true, spaces will be encoded as underscores per RFC1522. If the - optional argument *header* is present and false, newline characters will be - encoded as well; otherwise linefeed conversion might corrupt the binary data - stream. + present and true, spaces will be encoded as underscores per RFC1522. + + The return value uses ``b"\n"`` for hard and soft newlines by default, + but will use ``b"\r\n"`` instead if that sequence is found in + the original data (even if *istext* is false). .. function:: a2b_hqx(string) diff -r af092c1d3747 Doc/library/quopri.rst --- a/Doc/library/quopri.rst Sat Jan 17 21:09:16 2015 -0800 +++ b/Doc/library/quopri.rst Sun Jan 18 06:30:09 2015 +0000 @@ -24,24 +24,34 @@ .. function:: decode(input, output, header=False) Decode the contents of the *input* file and write the resulting decoded binary - data to the *output* file. *input* and *output* must be :term:`binary file objects - `. If the optional argument *header* is present and true, underscore - will be decoded as space. This is used to decode "Q"-encoded headers as + data to the *output* file. The *input* and *output* arguments + must be :term:`binary file objects `. + If the optional argument *header* is present and true, underscores + will be decoded as spaces. This is used to decode "Q"-encoded headers as described in :rfc:`1522`: "MIME (Multipurpose Internet Mail Extensions) Part Two: Message Header Extensions for Non-ASCII Text". + This function is equivalent to applying :func:`binascii.a2b_qp` to + the file data. + .. function:: encode(input, output, quotetabs, header=False) - Encode the contents of the *input* file and write the resulting quoted- - printable data to the *output* file. *input* and *output* must be - :term:`binary file objects `. *quotetabs*, a flag which controls - whether to encode embedded spaces and tabs must be provideda and when true it - encodes such embedded whitespace, and when false it leaves them unencoded. + Encode the contents of the *input* file and write the resulting + quoted-printable data to the *output* file. The *input* and + *output* arguments must be :term:`binary file objects `. + The *quotetabs* flag, which controls whether to encode + embedded spaces and tabs, must be provided. When true, it + encodes such embedded whitespace, and when false, it leaves it unencoded. Note that spaces and tabs appearing at the end of lines are always encoded, - as per :rfc:`1521`. *header* is a flag which controls if spaces are encoded + as per :rfc:`1521`. The *header* flag controls if spaces are encoded as underscores as per :rfc:`1522`. + This function is equivalent to applying :func:`binascii.b2a_qp` with + ``istext=True`` to the file data. Therefore, :func:`encode` should only + be used to encode text data that uses ``b"\r\n"`` or ``b"\n"`` as + newlines. + .. function:: decodestring(s, header=False) diff -r af092c1d3747 Lib/quopri.py --- a/Lib/quopri.py Sat Jan 17 21:09:16 2015 -0800 +++ b/Lib/quopri.py Sun Jan 18 06:30:09 2015 +0000 @@ -56,7 +56,7 @@ output.write(odata) return - def write(s, output=output, lineEnd=b'\n'): + def write(s, *, output=output, lineEnd): # RFC 1521 requires that the line ending in a space or tab must have # that trailing character encoded. if s and s[-1:] in b' \t': @@ -71,10 +71,16 @@ line = input.readline() if not line: break + # First, write out the previous line + if prevline is not None: + write(prevline, lineEnd=stripped or b'\n') outline = [] # Strip off any readline induced trailing newline stripped = b'' - if line[-1:] == b'\n': + if line[-2:] == b'\r\n': + line = line[:-2] + stripped = b'\r\n' + elif line[-1:] == b'\n': line = line[:-1] stripped = b'\n' # Calculate the un-length-limited encoded line @@ -86,16 +92,14 @@ outline.append(b'_') else: outline.append(c) - # First, write out the previous line - if prevline is not None: - write(prevline) # Now see if we need any soft line breaks because of RFC-imposed # length limitations. Then do the thisline->prevline dance. thisline = EMPTYSTRING.join(outline) + soft_break = b'=' + (stripped or b'\n') while len(thisline) > MAXLINESIZE: # Don't forget to include the soft line break `=' sign in the # length calculation! - write(thisline[:MAXLINESIZE-1], lineEnd=b'=\n') + write(thisline[:MAXLINESIZE-1], lineEnd=soft_break) thisline = thisline[MAXLINESIZE-1:] # Write out the current line prevline = thisline @@ -131,12 +135,17 @@ if not line: break i, n = 0, len(line) if n > 0 and line[n-1:n] == b'\n': - partial = 0; n = n-1 + partial = False + if line.endswith(b'\r\n'): + eol = b'\r\n' + else: + eol = b'\n' + n = n-len(eol) # Strip trailing whitespace while n > 0 and line[n-1:n] in b" \t\r": n = n-1 else: - partial = 1 + partial = True while i < n: c = line[i:i+1] if c == b'_' and header: @@ -144,7 +153,7 @@ elif c != ESCAPE: new = new + c; i = i+1 elif i+1 == n and not partial: - partial = 1; break + partial = True; break elif i+1 < n and line[i+1] == ESCAPE: new = new + ESCAPE; i = i+2 elif i+2 < n and ishex(line[i+1:i+2]) and ishex(line[i+2:i+3]): @@ -152,7 +161,7 @@ else: # Bad escape sequence -- leave it in new = new + c; i = i+1 if not partial: - output.write(new + b'\n') + output.write(new + eol) new = b'' if new: output.write(new) diff -r af092c1d3747 Lib/test/test_binascii.py --- a/Lib/test/test_binascii.py Sat Jan 17 21:09:16 2015 -0800 +++ b/Lib/test/test_binascii.py Sun Jan 18 06:30:09 2015 +0000 @@ -193,6 +193,14 @@ self.assertEqual(binascii.b2a_qp(b'.'), b'=2E') self.assertEqual(binascii.b2a_qp(b'.\n'), b'=2E\n') self.assertEqual(binascii.b2a_qp(b'a.\n'), b'a.\n') + self.assertEqual(binascii.b2a_qp(b'x' * 77, istext=False), + b'x' * 75 + b'=\n' # Non-text mode uses \n by default + b'xx') + self.assertEqual(binascii.b2a_qp(b'x' * 77 + b'\r\n', istext=False), + b'x' * 75 + b'=\r\n' # Switches to CRLF if seen in data + b'xx=0D=0A') + self.assertEqual(binascii.b2a_qp(b'newline\n', header=True), + b'newline\n') def test_empty_string(self): # A test for SF bug #1022953. Make sure SystemError is not raised. diff -r af092c1d3747 Lib/test/test_quopri.py --- a/Lib/test/test_quopri.py Sat Jan 17 21:09:16 2015 -0800 +++ b/Lib/test/test_quopri.py Sun Jan 18 06:30:09 2015 +0000 @@ -159,6 +159,42 @@ self.assertEqual(outfp.getvalue(), p) @withpythonimplementation + def test_newline(self): + plain_lines = ( + b'x' * 77 + b'Line 1 \t ', + b'Line 2\t \t', + b'Line 3', + b'No newline', + ) + enc_lines = ( + b'x' * 75 + b'=', + b'xx' b'Line 1 \t=20', + b'Line 2\t =09', + b'Line 3', + b'No newline', + ) + for newline in (b'\r\n', b'\n'): + with self.subTest(repr(newline)): + plaintext = newline.join(plain_lines) + encoded = newline.join(enc_lines) + self.assertEqual(encoded, quopri.encodestring(plaintext)) + self.assertEqual(plaintext, quopri.decodestring(encoded)) + + # Default soft newline is \n if there are no hard newlines + encoded = (b'x' * 75 + b'=\n' + b'xx') + self.assertEqual(encoded, quopri.encodestring(b'x' * 77)) + + @withpythonimplementation + def test_decode_nontext(self): + '''Should decode non-textual =0D and =0A bytes''' + encoded = (b'CR=0D CRLF=0D=0A NL=0A Soft NL=\n' + b'*Soft CRLF=\r\n' + b'*EOF') + data = b'CR\r CRLF\r\n NL\n Soft NL*Soft CRLF*EOF' + self.assertEqual(data, quopri.decodestring(encoded)) + + @withpythonimplementation def test_embedded_ws(self): for p, e in self.ESTRINGS: self.assertEqual(quopri.encodestring(p, quotetabs=True), e)