Index: Lib/zipfile.py =================================================================== --- Lib/zipfile.py (revision 74552) +++ Lib/zipfile.py (working copy) @@ -482,6 +482,7 @@ def set_univ_newlines(self, univ_newlines): self.univ_newlines = univ_newlines + self.newlines = None # pick line separator char(s) based on universal newlines flag self.nlSeps = ("\n", ) @@ -504,11 +505,6 @@ def _checkfornewline(self): nl, nllen = -1, -1 if self.linebuffer: - # ugly check for cases where half of an \r\n pair was - # read on the last pass, and the \r was discarded. In this - # case we just throw away the \n at the start of the buffer. - if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'): - self.linebuffer = self.linebuffer[1:] for sep in self.nlSeps: nl = self.linebuffer.find(sep) @@ -553,9 +549,7 @@ s = self.linebuffer self.linebuffer = '' return s - buf = self.linebuffer[:nl] - self.lastdiscard = self.linebuffer[nl:nl + nllen] self.linebuffer = self.linebuffer[nl + nllen:] # line is always returned with \n as newline char (except possibly @@ -574,6 +568,20 @@ return result def read(self, size = None): + bytes = self._do_read(size) + if size and self.univ_newlines: + # If there are n \r\n newlines in the file, _do_read will return + # size - n bytes. To deal with this, we keep calling do_read until + # we get the number of requested bytes or hit EOF + while size > len(bytes): + morebytes = self._do_read(size - len(bytes)) + if morebytes: + bytes += morebytes + else: + break + return bytes + + def _do_read(self, size = None): # act like file() obj and return empty string if size is 0 if size == 0: return '' @@ -633,7 +641,29 @@ self.readbuffer += newdata + if self.readbuffer and self.univ_newlines: + # ugly check for cases where half of an \r\n pair was + # read on the last pass, and the \r was discarded. In this + # case we just throw away the \n at the start of the buffer. + if (self.lastdiscard, self.readbuffer[0]) == ('\r', '\n'): + self.readbuffer = self.readbuffer[1:] + self._add_separator_to_newlines("\r\n") + if self.readbuffer and self.readbuffer[-1] == '\r': + self.lastdiscard = self.readbuffer[-1] + else: + self.lastdiscard = None + + for sep in self.nlSeps: + # PEP 278 - set the newlines attribute + if not self.newlines or sep not in self.newlines: + if sep in self.readbuffer: + self._add_separator_to_newlines(sep) + break + for sep in self.nlSeps: + if (sep != '\n'): + self.readbuffer = self.readbuffer.replace(sep, '\n') + # return what the user asked for if size is None or len(self.readbuffer) <= size: bytes = self.readbuffer @@ -644,6 +674,14 @@ return bytes + def _add_separator_to_newlines(self, sep): + if not self.newlines: + self.newlines = sep + elif sep not in self.newlines: + if type(self.newlines) is str: + self.newlines = (sep, self.newlines) + else: + self.newlines += (sep,) class ZipFile: """ Class with methods to open, read, write, close, list zip files. Index: Lib/test/test_zipfile.py =================================================================== --- Lib/test/test_zipfile.py (revision 74552) +++ Lib/test/test_zipfile.py (working copy) @@ -1073,11 +1073,19 @@ self.arcfiles[s] = '%s-%d' % (TESTFN, n) open(self.arcfiles[s], "wb").write(self.arcdata[s]) + def mixed_generator(): + for n, line in enumerate(self.line_gen): + yield line + self.seps[n % len(self.seps)] + self.mixeddata = ''.join([line for line in mixed_generator()]) + self.mixedfn = "%s-mixed" % (TESTFN,) + open(self.mixedfn, "wb").write(self.mixeddata) + def make_test_archive(self, f, compression): # Create the ZIP archive zipfp = zipfile.ZipFile(f, "w", compression) for fn in self.arcfiles.values(): zipfp.write(fn, fn) + zipfp.write(self.mixedfn, self.mixedfn) zipfp.close() def read_test(self, f, compression): @@ -1087,10 +1095,37 @@ zipfp = zipfile.ZipFile(f, "r") for sep, fn in self.arcfiles.items(): zipdata = zipfp.open(fn, "rU").read() - self.assertEqual(self.arcdata[sep], zipdata) + self.assertEqual(self.arcdata[sep].replace(sep, "\n"), zipdata) + zipdata = zipfp.open(self.mixedfn, "rU").read() + + mixeddata = self.mixeddata.replace('\r\n', '\n').replace('\r', '\n') + self.assertEqual(mixeddata, zipdata) zipfp.close() + def check_read_n(self, f): + for to_read in range(2, 1025, 128): + bytes = f.read(to_read) + while bytes: + # make sure we got the number of bytes we requested (unless + # we're at EOF) + if to_read != len(bytes): + if f.read(to_read): + self.assertEqual(to_read, len(bytes)) + bytes = f.read(to_read) + + + def read_n_test(self, f, compression): + self.make_test_archive(f, compression) + + zipfp = zipfile.ZipFile(f, "r") + for sep, fn in self.arcfiles.items(): + rfile = zipfp.open(fn, "rU") + self.check_read_n(rfile) + + rfile = zipfp.open(self.mixedfn, "rU") + self.check_read_n(rfile) + def readline_test(self, f, compression): self.make_test_archive(f, compression) @@ -1143,6 +1178,34 @@ for f in (TESTFN2, TemporaryFile(), StringIO()): self.iterlines_test(f, zipfile.ZIP_STORED) + def test_read_n(self): + """Test that read(nbytes) returns the requested number of bytes + except at EOF. + + """ + for f in (TESTFN2, TemporaryFile(), StringIO()): + self.read_n_test(f, zipfile.ZIP_STORED) + + def test_newlines_attr(self): + """Test that the newlines attribute is set correctly per PEP 278.""" + for f in (TESTFN2, TemporaryFile(), StringIO()): + self.make_test_archive(f, zipfile.ZIP_STORED) + zipfp = zipfile.ZipFile(f, "r") + for sep, fn in self.arcfiles.items(): + zipopen = zipfp.open(fn, "rU") + zipopen.read() + # for these files, there should only be one separator + # (i.e. \n or \r or \r\n) + self.assertEquals(sep, zipopen.newlines) + zipopen.close() + + zipopen = zipfp.open("%s-mixed" % (TESTFN,), "rU") + zipopen.read() + self.assert_('\n' in zipopen.newlines and + '\r\n' in zipopen.newlines and + '\r' in zipopen.newlines) + zipfp.close() + @skipUnless(zlib, "requires zlib") def test_read_deflated(self): for f in (TESTFN2, TemporaryFile(), StringIO()): @@ -1168,6 +1231,7 @@ os.remove(fn) unlink(TESTFN) unlink(TESTFN2) + os.remove(self.mixedfn) def test_main():