# HG changeset patch
# Parent af092c1d37478812b242f71e1663e17c5cf12d1a
Document, fix and test quoted-printable newline handling

* \n by default (e.g. for soft line breaks)
* CRLF if found in input (even in non-text)
* Native Python implementation in quopri did not handle CRLF
* Typo errors in documentation
* quopri uses istext=True
* header flag does not affect newline encoding; only istext affects it

diff -r af092c1d3747 Doc/library/binascii.rst
--- a/Doc/library/binascii.rst	Sat Jan 17 21:09:16 2015 -0800
+++ b/Doc/library/binascii.rst	Sun Jan 18 06:30:09 2015 +0000
@@ -62,7 +62,8 @@
 .. function:: a2b_qp(string, header=False)
 
    Convert a block of quoted-printable data back to binary and return the binary
-   data. More than one line may be passed at a time. If the optional argument
+   data. More than one line may be passed at a time, using either
+   ``b"\r\n"`` or ``b"\n"`` for newlines. If the optional argument
    *header* is present and true, underscores will be decoded as spaces.
 
    .. versionchanged:: 3.2
@@ -71,15 +72,16 @@
 
 .. function:: b2a_qp(data, quotetabs=False, istext=True, header=False)
 
-   Convert binary data to a line(s) of ASCII characters in quoted-printable
+   Convert binary data to line(s) of ASCII characters in quoted-printable
    encoding.  The return value is the converted line(s). If the optional argument
    *quotetabs* is present and true, all tabs and spaces will be encoded.   If the
-   optional argument *istext* is present and true, newlines are not encoded but
+   argument *istext* is true (the default), newlines are not encoded, but
    trailing whitespace will be encoded. If the optional argument *header* is
-   present and true, spaces will be encoded as underscores per RFC1522. If the
-   optional argument *header* is present and false, newline characters will be
-   encoded as well; otherwise linefeed conversion might corrupt the binary data
-   stream.
+   present and true, spaces will be encoded as underscores per RFC1522.
+
+   The return value uses ``b"\n"`` for hard and soft newlines by default,
+   but will use ``b"\r\n"`` instead if that sequence is found in
+   the original data (even if *istext* is false).
 
 
 .. function:: a2b_hqx(string)
diff -r af092c1d3747 Doc/library/quopri.rst
--- a/Doc/library/quopri.rst	Sat Jan 17 21:09:16 2015 -0800
+++ b/Doc/library/quopri.rst	Sun Jan 18 06:30:09 2015 +0000
@@ -24,24 +24,34 @@
 .. function:: decode(input, output, header=False)
 
    Decode the contents of the *input* file and write the resulting decoded binary
-   data to the *output* file. *input* and *output* must be :term:`binary file objects
-   <file object>`.  If the optional argument *header* is present and true, underscore
-   will be decoded as space. This is used to decode "Q"-encoded headers as
+   data to the *output* file. The *input* and *output* arguments
+   must be :term:`binary file objects <file object>`.
+   If the optional argument *header* is present and true, underscores
+   will be decoded as spaces. This is used to decode "Q"-encoded headers as
    described in :rfc:`1522`: "MIME (Multipurpose Internet Mail Extensions)
    Part Two: Message Header Extensions for Non-ASCII Text".
 
+   This function is equivalent to applying :func:`binascii.a2b_qp` to
+   the file data.
+
 
 .. function:: encode(input, output, quotetabs, header=False)
 
-   Encode the contents of the *input* file and write the resulting quoted-
-   printable data to the *output* file. *input* and *output* must be
-   :term:`binary file objects <file object>`. *quotetabs*, a flag which controls
-   whether to encode embedded spaces and tabs must be provideda and when true it
-   encodes such embedded whitespace, and when false it leaves them unencoded.
+   Encode the contents of the *input* file and write the resulting
+   quoted-printable data to the *output* file. The *input* and
+   *output* arguments must be :term:`binary file objects <file object>`.
+   The *quotetabs* flag, which controls whether to encode
+   embedded spaces and tabs, must be provided. When true, it
+   encodes such embedded whitespace, and when false, it leaves it unencoded.
    Note that spaces and tabs appearing at the end of lines are always encoded,
-   as per :rfc:`1521`.  *header* is a flag which controls if spaces are encoded
+   as per :rfc:`1521`.  The *header* flag controls if spaces are encoded
    as underscores as per :rfc:`1522`.
 
+   This function is equivalent to applying :func:`binascii.b2a_qp` with
+   ``istext=True`` to the file data. Therefore, :func:`encode` should only
+   be used to encode text data that uses ``b"\r\n"`` or ``b"\n"`` as
+   newlines.
+
 
 .. function:: decodestring(s, header=False)
 
diff -r af092c1d3747 Lib/quopri.py
--- a/Lib/quopri.py	Sat Jan 17 21:09:16 2015 -0800
+++ b/Lib/quopri.py	Sun Jan 18 06:30:09 2015 +0000
@@ -56,7 +56,7 @@
         output.write(odata)
         return
 
-    def write(s, output=output, lineEnd=b'\n'):
+    def write(s, *, output=output, lineEnd):
         # RFC 1521 requires that the line ending in a space or tab must have
         # that trailing character encoded.
         if s and s[-1:] in b' \t':
@@ -71,10 +71,16 @@
         line = input.readline()
         if not line:
             break
+        # First, write out the previous line
+        if prevline is not None:
+            write(prevline, lineEnd=stripped or b'\n')
         outline = []
         # Strip off any readline induced trailing newline
         stripped = b''
-        if line[-1:] == b'\n':
+        if line[-2:] == b'\r\n':
+            line = line[:-2]
+            stripped = b'\r\n'
+        elif line[-1:] == b'\n':
             line = line[:-1]
             stripped = b'\n'
         # Calculate the un-length-limited encoded line
@@ -86,16 +92,14 @@
                 outline.append(b'_')
             else:
                 outline.append(c)
-        # First, write out the previous line
-        if prevline is not None:
-            write(prevline)
         # Now see if we need any soft line breaks because of RFC-imposed
         # length limitations.  Then do the thisline->prevline dance.
         thisline = EMPTYSTRING.join(outline)
+        soft_break = b'=' + (stripped or b'\n')
         while len(thisline) > MAXLINESIZE:
             # Don't forget to include the soft line break `=' sign in the
             # length calculation!
-            write(thisline[:MAXLINESIZE-1], lineEnd=b'=\n')
+            write(thisline[:MAXLINESIZE-1], lineEnd=soft_break)
             thisline = thisline[MAXLINESIZE-1:]
         # Write out the current line
         prevline = thisline
@@ -131,12 +135,17 @@
         if not line: break
         i, n = 0, len(line)
         if n > 0 and line[n-1:n] == b'\n':
-            partial = 0; n = n-1
+            partial = False
+            if line.endswith(b'\r\n'):
+                eol = b'\r\n'
+            else:
+                eol = b'\n'
+            n = n-len(eol)
             # Strip trailing whitespace
             while n > 0 and line[n-1:n] in b" \t\r":
                 n = n-1
         else:
-            partial = 1
+            partial = True
         while i < n:
             c = line[i:i+1]
             if c == b'_' and header:
@@ -144,7 +153,7 @@
             elif c != ESCAPE:
                 new = new + c; i = i+1
             elif i+1 == n and not partial:
-                partial = 1; break
+                partial = True; break
             elif i+1 < n and line[i+1] == ESCAPE:
                 new = new + ESCAPE; i = i+2
             elif i+2 < n and ishex(line[i+1:i+2]) and ishex(line[i+2:i+3]):
@@ -152,7 +161,7 @@
             else: # Bad escape sequence -- leave it in
                 new = new + c; i = i+1
         if not partial:
-            output.write(new + b'\n')
+            output.write(new + eol)
             new = b''
     if new:
         output.write(new)
diff -r af092c1d3747 Lib/test/test_binascii.py
--- a/Lib/test/test_binascii.py	Sat Jan 17 21:09:16 2015 -0800
+++ b/Lib/test/test_binascii.py	Sun Jan 18 06:30:09 2015 +0000
@@ -193,6 +193,14 @@
         self.assertEqual(binascii.b2a_qp(b'.'), b'=2E')
         self.assertEqual(binascii.b2a_qp(b'.\n'), b'=2E\n')
         self.assertEqual(binascii.b2a_qp(b'a.\n'), b'a.\n')
+        self.assertEqual(binascii.b2a_qp(b'x' * 77, istext=False),
+            b'x' * 75 + b'=\n'  # Non-text mode uses \n by default
+            b'xx')
+        self.assertEqual(binascii.b2a_qp(b'x' * 77 + b'\r\n', istext=False),
+            b'x' * 75 + b'=\r\n'  # Switches to CRLF if seen in data
+            b'xx=0D=0A')
+        self.assertEqual(binascii.b2a_qp(b'newline\n', header=True),
+            b'newline\n')
 
     def test_empty_string(self):
         # A test for SF bug #1022953.  Make sure SystemError is not raised.
diff -r af092c1d3747 Lib/test/test_quopri.py
--- a/Lib/test/test_quopri.py	Sat Jan 17 21:09:16 2015 -0800
+++ b/Lib/test/test_quopri.py	Sun Jan 18 06:30:09 2015 +0000
@@ -159,6 +159,42 @@
             self.assertEqual(outfp.getvalue(), p)
 
     @withpythonimplementation
+    def test_newline(self):
+        plain_lines = (
+            b'x' * 77 + b'Line 1 \t ',
+            b'Line 2\t \t',
+            b'Line 3',
+            b'No newline',
+        )
+        enc_lines = (
+            b'x' * 75 + b'=',
+            b'xx' b'Line 1 \t=20',
+            b'Line 2\t =09',
+            b'Line 3',
+            b'No newline',
+        )
+        for newline in (b'\r\n', b'\n'):
+            with self.subTest(repr(newline)):
+                plaintext = newline.join(plain_lines)
+                encoded = newline.join(enc_lines)
+                self.assertEqual(encoded, quopri.encodestring(plaintext))
+                self.assertEqual(plaintext, quopri.decodestring(encoded))
+
+        # Default soft newline is \n if there are no hard newlines
+        encoded = (b'x' * 75 + b'=\n'
+            b'xx')
+        self.assertEqual(encoded, quopri.encodestring(b'x' * 77))
+
+    @withpythonimplementation
+    def test_decode_nontext(self):
+        '''Should decode non-textual =0D and =0A bytes'''
+        encoded = (b'CR=0D CRLF=0D=0A NL=0A Soft NL=\n'
+            b'*Soft CRLF=\r\n'
+            b'*EOF')
+        data = b'CR\r CRLF\r\n NL\n Soft NL*Soft CRLF*EOF'
+        self.assertEqual(data, quopri.decodestring(encoded))
+
+    @withpythonimplementation
     def test_embedded_ws(self):
         for p, e in self.ESTRINGS:
             self.assertEqual(quopri.encodestring(p, quotetabs=True), e)