Index: Lib/zipfile.py =================================================================== --- Lib/zipfile.py (revision 74552) +++ Lib/zipfile.py (working copy) @@ -482,6 +482,7 @@ def set_univ_newlines(self, univ_newlines): self.univ_newlines = univ_newlines + self.newlines = None # pick line separator char(s) based on universal newlines flag self.nlSeps = ("\n", ) @@ -504,11 +505,6 @@ def _checkfornewline(self): nl, nllen = -1, -1 if self.linebuffer: - # ugly check for cases where half of an \r\n pair was - # read on the last pass, and the \r was discarded. In this - # case we just throw away the \n at the start of the buffer. - if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'): - self.linebuffer = self.linebuffer[1:] for sep in self.nlSeps: nl = self.linebuffer.find(sep) @@ -553,9 +549,7 @@ s = self.linebuffer self.linebuffer = '' return s - buf = self.linebuffer[:nl] - self.lastdiscard = self.linebuffer[nl:nl + nllen] self.linebuffer = self.linebuffer[nl + nllen:] # line is always returned with \n as newline char (except possibly @@ -574,6 +568,20 @@ return result def read(self, size = None): + bytes = self._do_read(size) + if size and self.univ_newlines: + # If there are n \r\n newlines in the file, _do_read will return + # size - n bytes. To deal with this, we keep calling do_read until + # we get the number of requested bytes or hit EOF + while size > len(bytes): + morebytes = self._do_read(size - len(bytes)) + if morebytes: + bytes += morebytes + else: + break + return bytes + + def _do_read(self, size = None): # act like file() obj and return empty string if size is 0 if size == 0: return '' @@ -633,9 +641,11 @@ self.readbuffer += newdata + if self.readbuffer and self.univ_newlines: + self._handle_universal_newlines(size) # return what the user asked for - if size is None or len(self.readbuffer) <= size: + if size is None or size < 0 or len(self.readbuffer) <= size: bytes = self.readbuffer self.readbuffer = '' else: @@ -644,7 +654,44 @@ return bytes + def _handle_universal_newlines(self, size): + # ugly check for cases where half of an \r\n pair was + # read on the last pass, and the \r was discarded. In this + # case we just throw away the \n at the start of the buffer. + if (self.lastdiscard, self.readbuffer[0]) == ('\r', '\n'): + self.readbuffer = self.readbuffer[1:] + self._add_separator_to_newlines("\r\n") + if self.readbuffer and self.readbuffer[-1] == '\r': + self.lastdiscard = self.readbuffer[-1] + else: + self.lastdiscard = None + # PEP 278 - set the newlines attribute + if size < 0: + size = None + crlf = self.readbuffer[:size].count('\r\n') + cr = self.readbuffer[:size].count('\r') - crlf + lf = self.readbuffer[:size].count('\n') - crlf + if crlf: + self._add_separator_to_newlines('\r\n') + if cr: + self._add_separator_to_newlines('\r') + if lf: + self._add_separator_to_newlines('\n') + + for sep in self.nlSeps: + if (sep != '\n'): + self.readbuffer = self.readbuffer.replace(sep, '\n') + + def _add_separator_to_newlines(self, sep): + if not self.newlines: + self.newlines = sep + elif sep not in self.newlines: + if type(self.newlines) is str: + self.newlines = (sep, self.newlines) + else: + self.newlines += (sep,) + class ZipFile: """ Class with methods to open, read, write, close, list zip files. Index: Lib/test/test_zipfile.py =================================================================== --- Lib/test/test_zipfile.py (revision 74552) +++ Lib/test/test_zipfile.py (working copy) @@ -1073,11 +1073,19 @@ self.arcfiles[s] = '%s-%d' % (TESTFN, n) open(self.arcfiles[s], "wb").write(self.arcdata[s]) + def mixed_generator(): + for n, line in enumerate(self.line_gen): + yield line + self.seps[n % len(self.seps)] + self.mixeddata = ''.join([line for line in mixed_generator()]) + self.mixedfn = "%s-mixed" % (TESTFN,) + open(self.mixedfn, "wb").write(self.mixeddata) + def make_test_archive(self, f, compression): # Create the ZIP archive zipfp = zipfile.ZipFile(f, "w", compression) for fn in self.arcfiles.values(): zipfp.write(fn, fn) + zipfp.write(self.mixedfn, self.mixedfn) zipfp.close() def read_test(self, f, compression): @@ -1087,10 +1095,37 @@ zipfp = zipfile.ZipFile(f, "r") for sep, fn in self.arcfiles.items(): zipdata = zipfp.open(fn, "rU").read() - self.assertEqual(self.arcdata[sep], zipdata) + self.assertEqual(self.arcdata[sep].replace(sep, "\n"), zipdata) + zipdata = zipfp.open(self.mixedfn, "rU").read() + + mixeddata = self.mixeddata.replace('\r\n', '\n').replace('\r', '\n') + self.assertEqual(mixeddata, zipdata) zipfp.close() + def check_read_n(self, f): + for to_read in range(2, 1025, 128): + bytes = f.read(to_read) + while bytes: + # make sure we got the number of bytes we requested (unless + # we're at EOF) + if to_read != len(bytes): + if f.read(to_read): + self.assertEqual(to_read, len(bytes)) + bytes = f.read(to_read) + + + def read_n_test(self, f, compression): + self.make_test_archive(f, compression) + + zipfp = zipfile.ZipFile(f, "r") + for sep, fn in self.arcfiles.items(): + rfile = zipfp.open(fn, "rU") + self.check_read_n(rfile) + + rfile = zipfp.open(self.mixedfn, "rU") + self.check_read_n(rfile) + def readline_test(self, f, compression): self.make_test_archive(f, compression) @@ -1143,6 +1178,93 @@ for f in (TESTFN2, TemporaryFile(), StringIO()): self.iterlines_test(f, zipfile.ZIP_STORED) + def test_read_n(self): + """Test that read(nbytes) returns the requested number of bytes + except at EOF. + + """ + for f in (TESTFN2, TemporaryFile(), StringIO()): + self.read_n_test(f, zipfile.ZIP_STORED) + + def test_newlines_attr_mixed(self): + """Test that the newlines attribute is set correctly per PEP 278 for + files with mixed eols. + + """ + for f in (TESTFN2, TemporaryFile(), StringIO()): + self.make_test_archive(f, zipfile.ZIP_STORED) + zipfp = zipfile.ZipFile(f, "r") + zipopen = zipfp.open(self.mixedfn, "rU") + zipopen.read() + self.assert_('\n' in zipopen.newlines and + '\r\n' in zipopen.newlines and + '\r' in zipopen.newlines) + zipopen.close() + + # check to make sure a read(n) that's long enough to get all + # three eols does what we expect + zipopen = zipfp.open(self.mixedfn, "rU") + zipopen.read(512) + self.assert_('\n' in zipopen.newlines and + '\r\n' in zipopen.newlines and + '\r' in zipopen.newlines) + zipopen.close() + + zipopen = zipfp.open(self.mixedfn, "rU") + zipopen.readline() + zipopen.readline() + self.assertEquals(len(zipopen.newlines), 2) + self.assert_('\n' in zipopen.newlines and + '\r\n' in zipopen.newlines) + zipopen.close() + zipfp.close() + + def test_newlines_attr_unmixed(self): + """Test that the newlines attribute is set correctly per PEP 278 for + files with unmixed eols. + + """ + for f in (TESTFN2, TemporaryFile(), StringIO()): + self.make_test_archive(f, zipfile.ZIP_STORED) + zipfp = zipfile.ZipFile(f, "r") + for sep, fn in self.arcfiles.items(): + zipopen = zipfp.open(fn, "rU") + zipopen.read() + # for these files, there should only be one separator + # (i.e. \n or \r or \r\n) + self.assertEquals(sep, zipopen.newlines) + zipopen.close() + + def test_crlf_size(self): + """Testing read(size) with crlf""" + # like test_read_n, but with a file with nothing but crlf + filename = "%s-crlf" % (TESTFN,) + testfile = open(filename, "w") + testfile.write('\r\n' * 128) + testfile.close() + zipfp = zipfile.ZipFile(TESTFN2, "a") + zipfp.write(filename, filename) + zipopen = zipfp.open(filename, "rU") + data = zipopen.read(60) + self.assertEquals(data, '\n' * 60) + zipopen.close() + zipfp.close() + os.remove(TESTFN2) + + def test_read_negative(self): + """Test that read(-1) acts as expected. + + read(-1), read(None) and read() should all return the remainder of the + file. + + """ + for f in (TESTFN2, TemporaryFile(), StringIO()): + self.make_test_archive(f, zipfile.ZIP_STORED) + zipfp = zipfile.ZipFile(f, "r") + for sep, fn in self.arcfiles.items(): + zipdata = zipfp.open(fn, "rU").read(-1) + self.assertEqual(self.arcdata[sep].replace(sep, "\n"), zipdata) + @skipUnless(zlib, "requires zlib") def test_read_deflated(self): for f in (TESTFN2, TemporaryFile(), StringIO()): @@ -1168,6 +1290,7 @@ os.remove(fn) unlink(TESTFN) unlink(TESTFN2) + os.remove(self.mixedfn) def test_main():