Index: Lib/zipfile.py =================================================================== --- Lib/zipfile.py (revision 74552) +++ Lib/zipfile.py (working copy) @@ -482,6 +482,7 @@ def set_univ_newlines(self, univ_newlines): self.univ_newlines = univ_newlines + self.newlines = None # pick line separator char(s) based on universal newlines flag self.nlSeps = ("\n", ) @@ -504,11 +505,6 @@ def _checkfornewline(self): nl, nllen = -1, -1 if self.linebuffer: - # ugly check for cases where half of an \r\n pair was - # read on the last pass, and the \r was discarded. In this - # case we just throw away the \n at the start of the buffer. - if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'): - self.linebuffer = self.linebuffer[1:] for sep in self.nlSeps: nl = self.linebuffer.find(sep) @@ -553,9 +549,7 @@ s = self.linebuffer self.linebuffer = '' return s - buf = self.linebuffer[:nl] - self.lastdiscard = self.linebuffer[nl:nl + nllen] self.linebuffer = self.linebuffer[nl + nllen:] # line is always returned with \n as newline char (except possibly @@ -589,9 +583,11 @@ if size is not None and size >= 0: if self.compress_type == ZIP_STORED: lr = len(self.readbuffer) - bytesToRead = min(bytesToRead, size - lr) + # add two bytes to the requested size to cover for any bytes + # dropped by universal newline support + bytesToRead = min(bytesToRead, size + 2 - lr) elif self.compress_type == ZIP_DEFLATED: - if len(self.readbuffer) > size: + if len(self.readbuffer) > size + 2: # the user has requested fewer bytes than we've already # pulled through the decompressor; don't read any more bytesToRead = 0 @@ -633,6 +629,22 @@ self.readbuffer += newdata + if self.readbuffer and self.univ_newlines: + # case where the buffer ends between an \r and \n + if (self.lastdiscard, self.readbuffer[0]) == ('\r', '\n'): + self.readbuffer = self.readbuffer[1:] + self._add_separator_to_newlines("\r\n") + if self.readbuffer[-1] == '\r': + self.lastdiscard = self.readbuffer[-1] + else: + self.lastdiscard = None + for sep in self.nlSeps: + # PEP 278 - set the newlines attribute + if not self.newlines or sep not in self.newlines: + if sep in self.readbuffer: + self._add_separator_to_newlines(sep) + if (sep != '\n'): + self.readbuffer = self.readbuffer.replace(sep, '\n') # return what the user asked for if size is None or len(self.readbuffer) <= size: @@ -644,6 +656,14 @@ return bytes + def _add_separator_to_newlines(self, sep): + if not self.newlines: + self.newlines = sep + elif sep not in self.newlines: + if type(self.newlines) is str: + self.newlines = (sep, self.newlines) + else: + self.newlines += (sep,) class ZipFile: """ Class with methods to open, read, write, close, list zip files. Index: Lib/test/test_zipfile.py =================================================================== --- Lib/test/test_zipfile.py (revision 74552) +++ Lib/test/test_zipfile.py (working copy) @@ -1087,10 +1087,26 @@ zipfp = zipfile.ZipFile(f, "r") for sep, fn in self.arcfiles.items(): zipdata = zipfp.open(fn, "rU").read() - self.assertEqual(self.arcdata[sep], zipdata) + self.assertEqual(self.arcdata[sep].replace(sep, "\n"), zipdata) zipfp.close() + def read_n_test(self, f, compression): + self.make_test_archive(f, compression) + + zipfp = zipfile.ZipFile(f, "r") + for sep, fn in self.arcfiles.items(): + rfile = zipfp.open(fn, "rU") + for to_read in range(2, 16, 2): + bytes = rfile.read(to_read) + while bytes: + # make sure we got the number of bytes we requested (unless + # we're at EOF) + if to_read != len(bytes): + if rfile.read(to_read): + self.assertEqual(to_read, len(bytes)) + bytes = rfile.read(to_read) + def readline_test(self, f, compression): self.make_test_archive(f, compression) @@ -1143,6 +1159,25 @@ for f in (TESTFN2, TemporaryFile(), StringIO()): self.iterlines_test(f, zipfile.ZIP_STORED) + def test_read_n(self): + """Test that read(nbytes) returns the requested number of bytes + except at EOF. + + """ + for f in (TESTFN2, TemporaryFile(), StringIO()): + self.read_n_test(f, zipfile.ZIP_STORED) + + def test_newlines_att(self): + """Test that the newlines attribute is set correctly per PEP 278.""" + for f in (TESTFN2, TemporaryFile(), StringIO()): + self.make_test_archive(f, zipfile.ZIP_STORED) + zipfp = zipfile.ZipFile(f, "r") + for sep, fn in self.arcfiles.items(): + zipopen = zipfp.open(fn, "rU") + zipopen.read() + self.assert_(sep in zipopen.newlines) + zipopen.close() + @skipUnless(zlib, "requires zlib") def test_read_deflated(self): for f in (TESTFN2, TemporaryFile(), StringIO()):