Index: shlex.py =================================================================== --- shlex.py (revision 77174) +++ shlex.py (working copy) @@ -47,6 +47,7 @@ self.escapedquotes = '"' self.state = ' ' self.pushback = deque() + self.ungetc = deque() self.lineno = 1 self.debug = 0 self.token = '' @@ -121,7 +122,10 @@ quoted = False escapedstate = ' ' while True: - nextchar = self.instream.read(1) + if self.ungetc: + nextchar = self.ungetc.popleft() + else: + nextchar = self.instream.read(1) if nextchar == '\n': self.lineno = self.lineno + 1 if self.debug >= 3: @@ -142,8 +146,12 @@ else: continue elif nextchar in self.commenters: - self.instream.readline() - self.lineno = self.lineno + 1 + remainder = self.instream.readline() + if self.posix and remainder and remainder[-1] == '\n': + # the newline is not part of the comment + # according to POSIX, + # and is not necessarily whitespace + self.ungetc.append( '\n' ) elif self.posix and nextchar in self.escape: escapedstate = 'a' self.state = nextchar @@ -209,9 +217,15 @@ else: continue elif nextchar in self.commenters: - self.instream.readline() - self.lineno = self.lineno + 1 + remainder = self.instream.readline() + if self.posix: + if remainder and remainder[-1] == '\n': + # the newline is not part of the comment + # according to POSIX, + # and is not necessarily whitespace + self.ungetc.append( '\n' ) + self.state = ' ' if self.token or (self.posix and quoted): break # emit current token Index: test/test_shlex.py =================================================================== --- test/test_shlex.py (revision 77174) +++ test/test_shlex.py (working copy) @@ -139,16 +139,30 @@ αινσϊ|αινσϊ| """ +posix_newline_data = r"""foo\nbar|foo|\n|bar| +foo\n|foo|\n| +foo#bar|foo| +foo#bar\nbaz|foo|\n|baz| +foo #bar\nbaz|foo|\n|baz| +foo\\nbar|foo\nbar| +'foo\nbar'|foo\nbar| +""" + class ShlexTest(unittest.TestCase): def setUp(self): self.data = [x.split("|")[:-1] for x in data.splitlines()] self.posix_data = [x.split("|")[:-1] for x in posix_data.splitlines()] + self.posix_newline_data = [x.split("|")[:-1] + for x in posix_newline_data.splitlines()] for item in self.data: item[0] = item[0].replace(r"\n", "\n") for item in self.posix_data: item[0] = item[0].replace(r"\n", "\n") + for item in self.posix_newline_data: + for x in range(len(item)): + item[x] = item[x].replace(r"\n", "\n") def splitTest(self, data, comments): for i in range(len(data)): @@ -178,6 +192,27 @@ "%s: %s != %s" % (self.data[i][0], l, self.data[i][1:])) + def testNewlineTokens(self): + """Test handling newlines as tokens""" + def splitter(s): + ret = [] + lex = shlex.shlex(StringIO(s),posix=True) + lex.whitespace=" \r\t" + tok = lex.get_token() + while tok: + ret.append(tok) + tok = lex.get_token() + return ret + + data = self.posix_newline_data + + for i in range(len(data)): + l = splitter(data[i][0]) + self.assertEqual(l, data[i][1:], + "%s: %s != %s" % + (data[i][0], l, data[i][1:])) + + # Allow this test to be used with old shlex.py if not getattr(shlex, "split", None): for methname in dir(ShlexTest):