diff -r 64a54f0c87d7 Lib/cgi.py
--- a/Lib/cgi.py	Sat Nov 01 22:48:24 2014 -0500
+++ b/Lib/cgi.py	Tue Nov 04 05:49:42 2014 -0800
@@ -31,7 +31,7 @@
 # Imports
 # =======
 
-from io import StringIO, BytesIO, TextIOWrapper
+from io import StringIO, BytesIO, TextIOWrapper, BufferedReader
 from collections import Mapping
 import sys
 import os
@@ -42,6 +42,7 @@
 import html
 import locale
 import tempfile
+import re
 
 __all__ = ["MiniFieldStorage", "FieldStorage",
            "parse", "parse_qs", "parse_qsl", "parse_multipart",
@@ -359,6 +360,187 @@
         return "MiniFieldStorage(%r, %r)" % (self.name, self.value)
 
 
+class _CgiFp:
+    """ Support class for FieldStorage. This class should not be used directly.
+        fp -> instance of BufferedReader
+        boundary -> multipart boundary *including* the leading '--' delimiter
+
+        All methods advance the file-pointer only if they have determined that
+        the current payload in the buffer (of BufferedReader) does not run over
+        the next boundary in a multipart.
+        There are cases, where multi_read seeks the file pointer beyond the
+        data it has just returned. It does this knowing that the seek never
+        crossesthe next boundary and it mantains the difference in an
+        overlapped buffer. This overlapped buffer is part of the next payload.
+        This is done to resolve cases where a boundary may be between two
+        buffers.
+    """
+
+    def __init__(self, fp, boundary):
+        self._fp = fp
+        self._overlapped = b''
+        self._boundary = boundary
+        # self._eof = Indicates a read has returned an EOF
+        self._eof = False
+        # self._buffer = self._overlapped + self._peek
+        # self._peek = peeked data
+        # create self._bufer and self._peek using _remove_x_keep_buffer
+        self._remove_x_keep_buffer(0)
+
+        self._max_search = len(b'\r\n' + boundary + b'--\r\n')
+        self._initial = True
+
+        # RFC 2616
+        if len(boundary) > 72:
+            raise ValueError("multipart boundary too large")
+
+        self._initial = True
+
+        #Patter for all buffers
+        pattern_noeof = b'\n' + re.escape(boundary) + b'(?:--)?\r?\n'
+        self._re_noeof = re.compile(pattern_noeof)
+
+        #First Buffer
+        #We have a special pattern for the first buffer. The boundary at the
+        #start of the payload will begin without the CRLF.
+        #We should use this pattern only for the first buffer, as the
+        #subsequent buffer's "start of buffer" could be part of the payload,
+        #because we dont test for the leading CRLF/LF, when breaking a buffer.
+        pattern_first_noeof = pattern_noeof \
+            + b'|^' + re.escape(boundary) + b'(?:--)?\r?\n'
+        self._re_first_noeof = re.compile(pattern_first_noeof, re.M)
+
+        #Last Buffer
+        #Similary last buffer can end without a CRLF and is valid only for the
+        #last buffer. Also take care of the corner case where the payload is
+        #just the boundary without a trailing CRLF/LF
+        pattern_eof = pattern_noeof \
+            + b'|\n' + re.escape(boundary) + b'(?:--)?$' \
+            + b'|^' + re.escape(boundary) + b'(?:--)?$'
+        self._re_eof = re.compile(pattern_eof, re.M)
+
+
+    def _keep_x_forward_buffer(self, x):
+        """ Keep the last x bytes from _buffer and drain the read data.
+            This fuction is used when we have determined that self._buffer
+            does not have the next_boundary.
+        """
+        lenbuffer = len(self._buffer)
+        if x > lenbuffer:
+            x = lenbuffer
+        start = lenbuffer - x
+        self._overlapped = self._buffer[start:lenbuffer]
+        if not self._eof:
+            drain = self._fp.read(len(self._peek))
+            if not drain:
+                self._eof = True
+                self._peek = b''
+            else:
+                self._peek = self._fp.peek()
+        self._buffer = self._overlapped + self._peek
+
+    def _remove_x_keep_buffer(self, x):
+        """ Remove the fist x bytes of the buffer.
+            x >= len_of_overlapped
+            This is also called by the constructor
+        """
+        len_of_overlapped = len(self._overlapped)
+        self._overlapped = b''
+        # Note that x >= len_of_overlapped
+        # overlapped is only created when boundary is not found
+        # remove_x is called when boundary is found and x is at
+        # the end of the boundary. Hence x > len_of_overlapped.
+        # x = len_of_overlapped when x = 0
+        data_to_drain = x - len_of_overlapped
+        if not self._eof and data_to_drain:
+            drain = self._fp.read(data_to_drain)
+        self._peek = self._fp.peek()
+        self._buffer = self._overlapped + self._peek
+
+    def _check_boundary_head(self, hindex):
+        ''' Boundary has been located. If boundary is of pattern,\nboundary,
+            claim the leading \r if associated with \n as part of the boundary.
+            hindex = index of start of boundary in buffer. This could be either
+            boundary when found just after initialization or could be
+            \nbuffer.
+            returns index of the boundary including the leading CR.
+        '''
+        if hindex >= 1 and self._buffer[hindex-1 : hindex+1] == b'\r\n':
+            return hindex - 1
+        return hindex
+
+    def _check_boundary_trail(self, tindex):
+        ''' tindex = last element of the boundary.
+
+            returns True if the boundary is a last boundary, false otherwise.
+
+            A boundary could end with CRLF or LF or in case of EOF just the boundary.
+            Test if the boundary ends with --[CRLF|LF] to indicate last boundary.
+        '''
+        if self._buffer[tindex] == b'\n'[0]:
+            if tindex > 0:
+                if self._buffer[tindex - 1] == b'\r'[0]:
+                    if (tindex > 2 and
+                        self._buffer[tindex - 3:tindex - 1] == b'--'):
+                        #Ends as Boundary--CRLF
+                        return True
+                elif tindex > 1  and self._buffer[tindex - 2:tindex] == b'--':
+                    #Ends as Boundary--LF
+                    return True
+        elif (tindex > 0 and self._buffer[tindex-1:tindex + 1] == b'--'):
+            #Ends as Boundary--
+            return True
+        #Ends as Boundary
+        return False
+
+    def multi_read(self):
+        ''' returns chunk, (boundary, islastboundary)
+            boundary or chunk can be empty. When both are empty EOF
+            is reached.
+        '''
+
+        rsearch = None
+        if self._initial:
+            self._initial = False
+            rsearch = self._re_first_noeof.search(self._buffer)
+        else:
+            if self._eof:
+                rsearch = self._re_eof.search(self._buffer)
+            else:
+                rsearch = self._re_noeof.search(self._buffer)
+
+        if rsearch:
+            # boundary is found
+            start_index = self._check_boundary_head(rsearch.start())
+            islastboundary = self._check_boundary_trail(rsearch.end()-1)
+            rdata = ( \
+                    self._buffer[:start_index],\
+                    (self._buffer[start_index:rsearch.end()], islastboundary))
+            self._remove_x_keep_buffer(rsearch.end())
+            return rdata
+        else:
+            if self._eof:
+                rdata = self._buffer, (b'', None)
+                #clear the buffer
+                self._keep_x_forward_buffer(0)
+                return rdata
+            else:
+                chunk = self._buffer[:-self._max_search]
+                self._keep_x_forward_buffer(self._max_search)
+                if chunk:
+                    return chunk, (b'', None)
+                else:
+                    # we have to recurse.
+                    # we recurse because theoretically peek() does not make
+                    # a promise on the length of the buffer except it cannot
+                    # be zero. This recursion will happen in theory if
+                    # after we read the entire data using read,a peek returns
+                    # a buffer of length less than max_search_len.
+                    # The maximum depth of recursion is max_search_len (when
+                    # peek keeps returning us a buffer of length 1)
+                    # max_search_len < 78
+                    return self.multi_read()
+
 class FieldStorage:
 
     """Store a sequence of fields, reading multipart/form-data.
@@ -789,47 +971,67 @@
         """
         next_boundary = b"--" + self.outerboundary
         last_boundary = next_boundary + b"--"
-        delim = b""
-        last_line_lfend = True
         _read = 0
-        while 1:
-            if _read >= self.limit:
-                break
-            line = self.fp.readline(1<<16) # bytes
-            self.bytes_read += len(line)
-            _read += len(line)
-            if not line:
-                self.done = -1
-                break
-            if delim == b"\r":
-                line = delim + line
-                delim = b""
-            if line.startswith(b"--") and last_line_lfend:
-                strippedline = line.rstrip()
-                if strippedline == next_boundary:
+        if isinstance(self.fp, BufferedReader):
+            cgi_fp = _CgiFp(self.fp, next_boundary)
+            while 1:
+                if _read >= self.limit:
                     break
-                if strippedline == last_boundary:
-                    self.done = 1
+                line, (boundary, islastboundary) = cgi_fp.multi_read()
+                self.bytes_read += len(line)
+                self.bytes_read += len(boundary)
+                _read += len(line)
+                _read += len(boundary)
+                if not line and not boundary:
+                    self._done = -1
                     break
-            odelim = delim
-            if line.endswith(b"\r\n"):
-                delim = b"\r\n"
-                line = line[:-2]
-                last_line_lfend = True
-            elif line.endswith(b"\n"):
-                delim = b"\n"
-                line = line[:-1]
-                last_line_lfend = True
-            elif line.endswith(b"\r"):
-                # We may interrupt \r\n sequences if they span the 2**16
-                # byte boundary
-                delim = b"\r"
-                line = line[:-1]
-                last_line_lfend = False
-            else:
-                delim = b""
-                last_line_lfend = False
-            self.__write(odelim + line)
+                if line:
+                    self.__write(line)
+                if boundary:
+                    if islastboundary:
+                        self.done = 1
+                    break
+        else:
+            delim = b""
+            last_line_lfend = True
+            while 1:
+                if _read >= self.limit:
+                    break
+                line = self.fp.readline(1<<16) # bytes
+                self.bytes_read += len(line)
+                _read += len(line)
+                if not line:
+                    self.done = -1
+                    break
+                if delim == b"\r":
+                    line = delim + line
+                    delim = b""
+                if line.startswith(b"--") and last_line_lfend:
+                    strippedline = line.rstrip()
+                    if strippedline == next_boundary:
+                        break
+                    if strippedline == last_boundary:
+                        self.done = 1
+                        break
+                odelim = delim
+                if line.endswith(b"\r\n"):
+                    delim = b"\r\n"
+                    line = line[:-2]
+                    last_line_lfend = True
+                elif line.endswith(b"\n"):
+                    delim = b"\n"
+                    line = line[:-1]
+                    last_line_lfend = True
+                elif line.endswith(b"\r"):
+                    # We may interrupt \r\n sequences if they span the 2**16
+                    # byte boundary
+                    delim = b"\r"
+                    line = line[:-1]
+                    last_line_lfend = False
+                else:
+                    delim = b""
+                    last_line_lfend = False
+                self.__write(odelim + line)
 
     def skip_lines(self):
         """Internal: skip lines until outer boundary if defined."""
diff -r 64a54f0c87d7 Lib/test/test_cgi.py
--- a/Lib/test/test_cgi.py	Sat Nov 01 22:48:24 2014 -0500
+++ b/Lib/test/test_cgi.py	Tue Nov 04 05:49:42 2014 -0800
@@ -3,10 +3,11 @@
 import os
 import sys
 import tempfile
+import random
 import unittest
 import warnings
 from collections import namedtuple
-from io import StringIO, BytesIO
+from io import StringIO, BytesIO, BufferedReader
 
 class HackedSysModule:
     # The regression test will have real values in sys.argv, which
@@ -16,6 +17,10 @@
 
 cgi.sys = HackedSysModule()
 
+def fake_stdins(b, encoding='latin-1'):
+    yield BytesIO(b.encode(encoding))
+    yield BufferedReader(BytesIO(b.encode(encoding)))
+
 class ComparableException:
     def __init__(self, err):
         self.err = err
@@ -106,11 +111,8 @@
 def first_second_elts(list):
     return [(p[0], p[1][0]) for p in list]
 
-def gen_result(data, environ):
-    encoding = 'latin-1'
-    fake_stdin = BytesIO(data.encode(encoding))
-    fake_stdin.seek(0)
-    form = cgi.FieldStorage(fp=fake_stdin, environ=environ, encoding=encoding)
+def gen_result(fake_fp, environ, encoding='latin-1'):
+    form = cgi.FieldStorage(fp=fake_fp, environ=environ, encoding=encoding)
 
     result = {}
     for k, v in dict(form).items():
@@ -118,8 +120,107 @@
 
     return result
 
+def make_str_with_lfs(total_size, plf):
+    # generates a random string
+    # '\n' is filled with a probability of plf
+    nonlf_bytes = [i for i in range(256) if i != b'\n'[0]]
+    ba = bytearray(b'\x00' * total_size)
+    random_size = int(255 / ( 1 - plf)) - 1
+    num_lf = 0
+    for i in range(total_size):
+        r = random.randint(0, random_size)
+        if (r <= 254):
+            ba[i] = nonlf_bytes[r]
+        else:
+            ba[i] = ord('\n')
+            num_lf += 1
+    return ba, num_lf
+
+def write_file_multi(temp_file_name, prologue, epilogue, input_array):
+    with open(temp_file_name, 'wb') as outfile:
+        outfile.write(prologue)
+        outfile.write(input_array)
+        outfile.write(epilogue)
+
+def run_bad_outerboundary_test(testcase, in_byte):
+    prologue = b'''------WebKitFormBoundaryeHCgrOGACrcYuuB5
+Content-Disposition: form-data; name="textline"
+
+form
+------WebKitFormBoundaryeHCgrOGACrcYuuB5
+Content-Disposition: form-data; name="datafile"; filename="somefile.bin"
+Content-Type: application/octet-stream
+
+'''
+    epilogue = b'\r\n'
+
+    write_file_multi(testcase.tempfile, prologue, epilogue, in_byte)
+    size = os.stat(testcase.tempfile).st_size
+    environ = {}
+    environ["CONTENT_TYPE"]   = """\
+multipart/form-data; boundary=----WebKitFormBoundaryeHCgrOGACrcYuuB5"""
+    environ["REQUEST_METHOD"] = "POST"
+    environ["CONTENT_LENGTH"] = str(size)
+    with open(testcase.tempfile,'rb') as fp:
+        fs = cgi.FieldStorage(fp, None, environ=environ)
+        testcase.assertTrue(len(fs['datafile'].value) > len(in_byte))
+
+def run_multi_test(testcase, in_byte, buffer_size,
+                   outer_boundary_head, outer_boundary_tail,
+                   outer_boundary_delimiter):
+
+    prologue = b'''------WebKitFormBoundaryeHCgrOGACrcYuuB5
+Content-Disposition: form-data; name="textline"
+
+form
+------WebKitFormBoundaryeHCgrOGACrcYuuB5
+Content-Disposition: form-data; name="datafile"; filename="somefile.bin"
+Content-Type: application/octet-stream
+
+'''
+    epilogue = outer_boundary_head + \
+               b'------WebKitFormBoundaryeHCgrOGACrcYuuB5' + \
+               outer_boundary_delimiter + outer_boundary_tail
+
+    write_file_multi(testcase.tempfile, prologue, epilogue, in_byte)
+    size = os.stat(testcase.tempfile).st_size
+    environ = {}
+    environ["CONTENT_TYPE"]   = """\
+multipart/form-data; boundary=----WebKitFormBoundaryeHCgrOGACrcYuuB5"""
+    environ["REQUEST_METHOD"] = "POST"
+    environ["CONTENT_LENGTH"] = str(size)
+    testcase.assertEqual(size, len(epilogue) + len(prologue) + len(in_byte))
+    with open(testcase.tempfile,'rb', buffer_size) as fp:
+        fs = cgi.FieldStorage(fp, None, environ=environ)
+        testcase.assertEqual(fs['datafile'].value, in_byte)
+
+def multi_test(testcase, in_byte, buffer_size,
+               outer_boundary_head=b'\r\n', outer_boundary_tail=b'\r\n',
+               outer_boundary_delimiter=b'--'):
+    try:
+        run_multi_test(testcase, in_byte, buffer_size, outer_boundary_head,
+                       outer_boundary_tail, outer_boundary_delimiter)
+    except:
+        print(repr(in_byte))
+        raise
+
+def bad_outerboundary_test(testcase, in_byte):
+    try:
+        run_bad_outerboundary_test(testcase, in_byte)
+    except:
+        print(repr(in_byte))
+        raise
+
 class CgiTests(unittest.TestCase):
 
+    def setUp(self):
+        fd, self.tempfile = tempfile.mkstemp()
+        os.close(fd)
+        self.addCleanup(os.unlink, self.tempfile)
+
+    def tearDown(self):
+        pass
+
     def test_parse_multipart(self):
         fp = BytesIO(POSTDATA.encode('latin1'))
         env = {'boundary': BOUNDARY.encode('latin1'),
@@ -280,8 +381,9 @@
                 'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
                 'REQUEST_METHOD':   'POST',
             }
-            self.assertEqual(gen_result(data, environ),
-                             {'upload': content.encode('latin1')})
+            for fake_stdin in fake_stdins(data):
+                self.assertEqual(gen_result(fake_stdin, environ),
+                                 {'upload': content.encode('latin1')})
         check('x' * (maxline - 1))
         check('x' * (maxline - 1) + '\r')
         check('x' * (maxline - 1) + '\r' + 'y' * (maxline - 1))
@@ -321,8 +423,9 @@
             'QUERY_STRING':     'key1=value1&key2=value2y',
             'REQUEST_METHOD':   'POST',
         }
-        v = gen_result(data, environ)
-        self.assertEqual(self._qs_result, v)
+        for fake_stdin in fake_stdins(data):
+            v = gen_result(fake_stdin, environ)
+            self.assertEqual(self._qs_result, v)
 
     def testQSAndFormData(self):
         data = """---123
@@ -345,8 +448,9 @@
             'QUERY_STRING':     'key1=value1&key2=value2x',
             'REQUEST_METHOD':   'POST',
         }
-        v = gen_result(data, environ)
-        self.assertEqual(self._qs_result, v)
+        for fake_stdin in fake_stdins(data):
+            v = gen_result(fake_stdin, environ)
+            self.assertEqual(self._qs_result, v)
 
     def testQSAndFormDataFile(self):
         data = """---123
@@ -379,8 +483,9 @@
         result.update({
             'upload': b'this is the content of the fake file\n'
         })
-        v = gen_result(data, environ)
-        self.assertEqual(result, v)
+        for fake_stdin in fake_stdins(data):
+            v = gen_result(fake_stdin, environ)
+            self.assertEqual(result, v)
 
     def test_deprecated_parse_qs(self):
         # this func is moved to urllib.parse, this is just a sanity check
@@ -425,6 +530,56 @@
             cgi.parse_header('form-data; name="files"; filename="fo\\"o;bar"'),
             ("form-data", {"name": "files", "filename": 'fo"o;bar'}))
 
+    def test_end_boundary_overlaps_buffer(self):
+        buffer_size = 1 << 9
+        in_byte_len = buffer_size - 3
+        in_byte, _ = make_str_with_lfs(in_byte_len, 0.3)
+        multi_test(self, in_byte, buffer_size)
+
+    def test_payload_less_than_buffer(self):
+        buffer_size = 1 << 10
+        in_byte_len = 1 << 9
+        in_byte, _ = make_str_with_lfs(in_byte_len, 0.4)
+        multi_test(self, in_byte, buffer_size)
+
+    def test_payload_less_than_buffer_lf(self):
+        buffer_size = 1 << 10
+        in_byte_len = 1 << 9
+        in_byte, _ = make_str_with_lfs(in_byte_len, 0.3)
+        multi_test(self, in_byte, buffer_size,
+                   outer_boundary_head=b'\n', outer_boundary_tail=b'\n')
+
+    def test_payload_bigger_than_buffer(self):
+        buffer_size = 1 << 7
+        in_byte_len = 1 << 9
+        in_byte, _ = make_str_with_lfs(in_byte_len, 0.3)
+        multi_test(self, in_byte, buffer_size)
+
+    def test_no_crlf_after_boundary(self):
+        buffer_size = 1 << 5
+        in_byte_len = 1 << 9
+        in_byte, _ = make_str_with_lfs(in_byte_len, 0.3)
+        multi_test(self, in_byte, buffer_size,
+                   outer_boundary_head=b'\n', outer_boundary_tail=b'',
+                   outer_boundary_delimiter=b'')
+
+    def test_no_crlf_after_outerboundary(self):
+        buffer_size = 1 << 5
+        in_byte_len = 1 << 9
+        in_byte, _ = make_str_with_lfs(in_byte_len, 0.3)
+        multi_test(self, in_byte, buffer_size,
+                   outer_boundary_head=b'\n', outer_boundary_tail=b'')
+
+    def test_very_small_buffer(self):
+        buffer_size = 2
+        in_byte_len = 1 << 5
+        in_byte, _ = make_str_with_lfs(in_byte_len, 0.3)
+        multi_test(self, in_byte, buffer_size)
+
+    def test_badlyformed_multipart(self):
+        in_byte_len = 1 << 9
+        in_byte, _ = make_str_with_lfs(in_byte_len, 0.3)
+        bad_outerboundary_test(self, in_byte)
 
 BOUNDARY = "---------------------------721837373350705526688164684"