diff -urN old/Lib/shlex.py new/Lib/shlex.py
--- old/Lib/shlex.py	2009-12-29 00:02:14.000000000 -0800
+++ new/Lib/shlex.py	2009-12-29 00:02:04.000000000 -0800
@@ -6,6 +6,8 @@
 # push_source() and pop_source() made explicit by ESR, January 2001.
 # Posix compliance, split(), string arguments, and
 # iterator interface by Gustavo Niemeyer, April 2003.
+# conversion to generator, mass optimization and cleanup
+# by Brian Harring, December 2009
 
 import os.path
 import sys
@@ -18,28 +20,58 @@
 
 __all__ = ["shlex", "split"]
 
+
+class stream_source(object):
+
+    def __init__(self, stream, filename=None):
+        self.stream = stream
+        self.lineno = 1
+        self.eof = False
+        self.filename = filename
+
+    def readline(self):
+        data = self.stream.readline()
+        self.lineno += 1
+        return data
+
+    def __iter__(self):
+        if self.stream is None:
+            raise StopIteration()
+        read = self.stream.read
+        data = read(1)
+        while data:
+            if data in '\n':
+                self.lineno += 1
+            yield data
+            data = read(1)
+        self.close()
+
+    def close(self):
+        self.eof = True
+        self.stream = None
+
+    def __str__(self):
+        if self.filename:
+            return "streamed filename %r" % (self.filename,)
+        return "stream %r" % (self.stream,)
+
+
 class shlex:
     "A lexical analyzer class for simple shell-like syntaxes."
-    def __init__(self, instream=None, infile=None, posix=False):
-        if isinstance(instream, basestring):
-            instream = StringIO(instream)
-        if instream is not None:
-            self.instream = instream
-            self.infile = infile
-        else:
-            self.instream = sys.stdin
-            self.infile = None
+    def __init__(self, instream=None, infile=None, posix=False,
+        debug=0):
         self.posix = posix
         if posix:
             self.eof = None
         else:
             self.eof = ''
         self.commenters = '#'
-        self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
-                          'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
+        self._wordchars = set(('abcdfeghijklmnopqrstuvwxyz'
+                          'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'))
         if self.posix:
-            self.wordchars += ('Яабвгдежзийклмнопрстуфхцшщъыьэюя'
+            self._wordchars.update('Яабвгдежзийклмнопрстуфхцшщъыьэюя'
                                'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦШЩЪЫЬЭЮ')
+
         self.whitespace = ' \t\r\n'
         self.whitespace_split = False
         self.quotes = '\'"'
@@ -47,57 +79,81 @@
         self.escapedquotes = '"'
         self.state = ' '
         self.pushback = deque()
-        self.lineno = 1
-        self.debug = 0
-        self.token = ''
+        self.debug = debug
         self.filestack = deque()
         self.source = None
+
         if self.debug:
-            print 'shlex: reading from %s, line %d' \
-                  % (self.instream, self.lineno)
+            self.emit_debug('reading from %s, line %d',
+                (self.instream, self.lineno,))
+
+        self.instream = None
+        self.token_stream = None
+        self.push_source(instream, newfile=infile)
+
+    @property
+    def infile(self):
+        return self.instream.filename
+
+    @property
+    def lineno(self):
+        return self.instream.lineno
+
+    def emit_debug(self, format, args=(), level=1):
+        if self.level >= level:
+            print "shlex: " + (format % args)
+
+
+    # purely for backwards compatibility... annoying.
+    def _set_wordchars(self, arg):
+        self._wordchars = set(arg)
+
+    def _get_wordchars(self, arg):
+        return tuple(self._wordchars)
+
+    wordchards = property(_get_wordchars, _set_wordchars)
 
     def push_token(self, tok):
         "Push a token onto the stack popped by the get_token method"
-        if self.debug >= 1:
-            print "shlex: pushing token " + repr(tok)
+        if self.debug:
+            self.emit_debug("pushing token %r", (tok,))
         self.pushback.appendleft(tok)
 
     def push_source(self, newstream, newfile=None):
         "Push an input source onto the lexer's input source stack."
         if isinstance(newstream, basestring):
             newstream = StringIO(newstream)
-        self.filestack.appendleft((self.infile, self.instream, self.lineno))
-        self.infile = newfile
-        self.instream = newstream
-        self.lineno = 1
+        if self.instream:
+            self.filestack.appendleft((self.token_stream, self.instream))
+        self.instream = stream_source(newstream, filename=newfile)
+        self.token_stream = self.read_token_stream()
         if self.debug:
-            if newfile is not None:
-                print 'shlex: pushing to file %s' % (self.infile,)
-            else:
-                print 'shlex: pushing to stream %s' % (self.instream,)
+            self.emit_debug("pushing to %s", (self.instream,))
 
     def pop_source(self):
         "Pop the input source stack."
         self.instream.close()
-        (self.infile, self.instream, self.lineno) = self.filestack.popleft()
+        (self.token_stream, self.instream) = self.filestack.popleft()
         if self.debug:
-            print 'shlex: popping to %s, line %d' \
-                  % (self.instream, self.lineno)
+            self.emit_debug("popping to %s, line %d", (self.instream, self.lineno))
         self.state = ' '
 
     def get_token(self):
         "Get a token from the input stream (or from stack if it's nonempty)"
         if self.pushback:
             tok = self.pushback.popleft()
-            if self.debug >= 1:
-                print "shlex: popping token " + repr(tok)
+            if self.debug:
+                self.emit_debug("popping token %r", (tok,))
             return tok
         # No pushback.  Get a token.
         raw = self.read_token()
         # Handle inclusions
         if self.source is not None:
             while raw == self.source:
-                spec = self.sourcehook(self.read_token())
+                token = self.read_token()
+                if self.debug:
+                    self.emit_debug("raw token=%r" % (token,))
+                spec = self.sourcehook(token)
                 if spec:
                     (newfile, newstream) = spec
                     self.push_source(newstream, newfile)
@@ -106,144 +162,179 @@
         while raw == self.eof:
             if not self.filestack:
                 return self.eof
-            else:
-                self.pop_source()
-                raw = self.get_token()
+            self.pop_source()
+            raw = self.get_token()
         # Neither inclusion nor EOF
-        if self.debug >= 1:
-            if raw != self.eof:
-                print "shlex: token=" + repr(raw)
-            else:
-                print "shlex: token=EOF"
+        if self.debug:
+            self.emit_debug("token=%r", (raw == self.eof and 'EOF' or raw))
         return raw
 
     def read_token(self):
-        quoted = False
-        escapedstate = ' '
-        while True:
-            nextchar = self.instream.read(1)
-            if nextchar == '\n':
-                self.lineno = self.lineno + 1
-            if self.debug >= 3:
-                print "shlex: in state", repr(self.state), \
-                      "I see character:", repr(nextchar)
-            if self.state is None:
-                self.token = ''        # past end of file
+        try:
+            return self.token_stream.next()
+        except StopIteration:
+            return self.eof
+
+    def _nonposix_parse_quote(self, stream_i, quoted, token):
+        token = quoted
+        for nextchar in stream_i:
+            token += nextchar
+            if nextchar == quoted:
                 break
-            elif self.state == ' ':
-                if not nextchar:
-                    self.state = None  # end of file
+        else:
+            raise ValueError("No closing quotation")
+        # got all of it, yield it; basicall "DO" of "DO"monkey
+        return token, True
+
+    def _posix_parse_quote(self, stream_i, quoted, token):
+        if quoted in self.escapedquotes:
+            escape = self.escape
+            try:
+                for nextchar in stream_i:
+                    if nextchar in escape:
+                        escaper = nextchar
+                        nextchar = stream_i.next()
+                        if nextchar != escaper and nextchar != quoted:
+                            token += escaper
+                    elif nextchar == quoted:
+                        break
+                    token += nextchar
+                else:
+                    raise ValueError("No closing quotation")
+            except StopIteration:
+                # escaped char...
+                raise ValueError("No closing quotation")
+        else:
+            for nextchar in stream_i:
+                if nextchar == quoted:
                     break
-                elif nextchar in self.whitespace:
-                    if self.debug >= 2:
-                        print "shlex: I see whitespace in whitespace state"
-                    if self.token or (self.posix and quoted):
-                        break   # emit current token
-                    else:
+                token += nextchar
+            else:
+                raise ValueError("No closing quotation")
+
+        if not self.whitespace_split:
+            return token, True
+        return token, False
+
+    def read_token_stream(self):
+        if self.state is None:
+            return
+
+        debug = self.debug
+
+        instream = self.instream
+        whitespace = self.whitespace
+        commenters = self.commenters
+        escape = self.escape
+        wordchars = self._wordchars
+        posix = self.posix
+        if posix:
+            parse_quote = self._posix_parse_quote
+        else:
+            parse_quote = self._nonposix_parse_quote
+
+        quotes = self.quotes
+
+        got_input = False
+
+        assert instream is not None
+        stream_i = iter(instream)
+
+        while True:
+
+            got_input = False
+            token = ''
+
+            for nextchar in stream_i:
+                got_input = True
+                if nextchar in whitespace:
+                    continue
+
+                if nextchar in commenters:
+                    # literally just like this comment here, ignore everything that follows
+                    instream.readline()
+                    continue
+
+                elif posix and nextchar in escape:
+                    try:
+                        token += stream_i.next()
+                    except StopIteration:
+                        raise ValueError("No escaped character")
+
+                elif nextchar in wordchars:
+                    token = nextchar
+
+                elif nextchar in quotes:
+                    chunk, reset = parse_quote(stream_i, nextchar, token)
+                    token += chunk
+                    if reset:
+                        yield token
+                        token = ''
                         continue
-                elif nextchar in self.commenters:
-                    self.instream.readline()
-                    self.lineno = self.lineno + 1
-                elif self.posix and nextchar in self.escape:
-                    escapedstate = 'a'
-                    self.state = nextchar
-                elif nextchar in self.wordchars:
-                    self.token = nextchar
-                    self.state = 'a'
-                elif nextchar in self.quotes:
-                    if not self.posix:
-                        self.token = nextchar
-                    self.state = nextchar
+
                 elif self.whitespace_split:
-                    self.token = nextchar
-                    self.state = 'a'
+                    token = nextchar
+
                 else:
-                    self.token = nextchar
-                    if self.token or (self.posix and quoted):
-                        break   # emit current token
-                    else:
-                        continue
-            elif self.state in self.quotes:
-                quoted = True
-                if not nextchar:      # end of file
-                    if self.debug >= 2:
-                        print "shlex: I see EOF in quotes state"
-                    # XXX what error should be raised here?
-                    raise ValueError, "No closing quotation"
-                if nextchar == self.state:
-                    if not self.posix:
-                        self.token = self.token + nextchar
-                        self.state = ' '
+                    # punctuation...
+                    yield nextchar
+                    continue
+                # and... we're done processing the whitespace.
+                break
+
+            # non whitespace appending
+            for nextchar in stream_i:
+                got_input = True
+
+                if nextchar in wordchars:
+                    token += nextchar
+                    continue
+
+                if nextchar in whitespace:
+                    if debug:
+                        self.emit_debug("I see whitespace in word state", level=2)
+                    if token or posix:
+                        yield token
+                    break
+
+                elif nextchar in commenters:
+                    instream.readline()
+                    if posix:
+                        if token:
+                            yield token
+                        break
+
+                elif posix and nextchar in quotes:
+                    chunk, reset = parse_quote(stream_i, nextchar, token)
+                    token = chunk
+                    if reset:
+                        yield token
                         break
-                    else:
-                        self.state = 'a'
-                elif self.posix and nextchar in self.escape and \
-                     self.state in self.escapedquotes:
-                    escapedstate = self.state
-                    self.state = nextchar
+
+                elif posix and nextchar in escape:
+                    try:
+                        token += stream_i.next()
+                    except StopIteration:
+                        raise ValueError("no escape character")
+
+                elif nextchar in quotes or self.whitespace_split:
+                    token += nextchar
+
                 else:
-                    self.token = self.token + nextchar
-            elif self.state in self.escape:
-                if not nextchar:      # end of file
-                    if self.debug >= 2:
-                        print "shlex: I see EOF in escape state"
-                    # XXX what error should be raised here?
-                    raise ValueError, "No escaped character"
-                # In posix shells, only the quote itself or the escape
-                # character may be escaped within quotes.
-                if escapedstate in self.quotes and \
-                   nextchar != self.state and nextchar != escapedstate:
-                    self.token = self.token + self.state
-                self.token = self.token + nextchar
-                self.state = escapedstate
-            elif self.state == 'a':
-                if not nextchar:
-                    self.state = None   # end of file
+                    if debug:
+                        self.emit_debug("I see punctuation in word state", level=2)
+                    assert token
+                    yield token
+                    yield nextchar # now yield the punctuation...
                     break
-                elif nextchar in self.whitespace:
-                    if self.debug >= 2:
-                        print "shlex: I see whitespace in word state"
-                    self.state = ' '
-                    if self.token or (self.posix and quoted):
-                        break   # emit current token
-                    else:
-                        continue
-                elif nextchar in self.commenters:
-                    self.instream.readline()
-                    self.lineno = self.lineno + 1
-                    if self.posix:
-                        self.state = ' '
-                        if self.token or (self.posix and quoted):
-                            break   # emit current token
-                        else:
-                            continue
-                elif self.posix and nextchar in self.quotes:
-                    self.state = nextchar
-                elif self.posix and nextchar in self.escape:
-                    escapedstate = 'a'
-                    self.state = nextchar
-                elif nextchar in self.wordchars or nextchar in self.quotes \
-                    or self.whitespace_split:
-                    self.token = self.token + nextchar
-                else:
-                    self.pushback.appendleft(nextchar)
-                    if self.debug >= 2:
-                        print "shlex: I see punctuation in word state"
-                    self.state = ' '
-                    if self.token:
-                        break   # emit current token
-                    else:
-                        continue
-        result = self.token
-        self.token = ''
-        if self.posix and not quoted and result == '':
-            result = None
-        if self.debug > 1:
-            if result:
-                print "shlex: raw token=" + repr(result)
             else:
-                print "shlex: raw token=EOF"
-        return result
+                if not token and not got_input:
+                    token = self.eof
+                yield token
+                break
+
+
+        self.state = None
 
     def sourcehook(self, newfile):
         "Hook called on a filename to be sourced."
@@ -263,13 +354,13 @@
         return "\"%s\", line %d: " % (infile, lineno)
 
     def __iter__(self):
-        return self
+        get_token = self.get_token
+        token = get_token()
+        eof = self.eof
+        while token != eof:
+            yield token
+            token = get_token()
 
-    def next(self):
-        token = self.get_token()
-        if token == self.eof:
-            raise StopIteration
-        return token
 
 def split(s, comments=False, posix=True):
     lex = shlex(s, posix=posix)
@@ -280,13 +371,8 @@
 
 if __name__ == '__main__':
     if len(sys.argv) == 1:
-        lexer = shlex()
+        args = [sys.stdin]
     else:
-        file = sys.argv[1]
-        lexer = shlex(open(file), file)
-    while 1:
-        tt = lexer.get_token()
-        if tt:
-            print "Token: " + repr(tt)
-        else:
-            break
+        args = [open(sys.argv[1]), sys.argv[1]]
+    for token in shlex(*args):
+        print "Token %r" % (token,)
diff -urN old/Lib/test/test_shlex.py new/Lib/test/test_shlex.py
--- old/Lib/test/test_shlex.py	2009-12-29 00:02:19.000000000 -0800
+++ new/Lib/test/test_shlex.py	2009-12-29 00:02:04.000000000 -0800
@@ -25,6 +25,7 @@
 foo \ x bar|foo|\|x|bar|
 foo \ bar|foo|\|bar|
 foo "bar" bla|foo|"bar"|bla|
+"foo" "bar"|"foo"|"bar"|
 "foo" "bar" "bla"|"foo"|"bar"|"bla"|
 "foo" bar "bla"|"foo"|bar|"bla"|
 "foo" bar bla|"foo"|bar|bla|
@@ -139,22 +140,37 @@
 бйнуъ|бйнуъ|
 """
 
+# data that is used for shlex direct invocation, instead of split.
+# split flips on whitespace_split always... not great for testing the
+# parser, especially consider x=\ dar # assignments
+posix_data_shlex_direct = r"""x=\ dar|x|=| dar|
+\ dar| dar|
+dar |dar|
+"""
+
+
 class ShlexTest(unittest.TestCase):
     def setUp(self):
-        self.data = [x.split("|")[:-1]
-                     for x in data.splitlines()]
-        self.posix_data = [x.split("|")[:-1]
-                           for x in posix_data.splitlines()]
-        for item in self.data:
-            item[0] = item[0].replace(r"\n", "\n")
-        for item in self.posix_data:
-            item[0] = item[0].replace(r"\n", "\n")
-
-    def splitTest(self, data, comments):
+        for attr, src in (("data", data), ("posix_data", posix_data),
+            ("posix_data_shlex_direct", posix_data_shlex_direct)):
+            l = [x.split("|")[:-1] for x in src.splitlines()]
+            for item in l:
+                item[0] = item[0].replace(r"\n", "\n")
+            setattr(self, attr, l)
+
+    def splitTest(self, data, comments, use_split=True):
+        if use_split:
+            f = lambda val: shlex.split(val, comments=comments)
+        else:
+            def f(val):
+                parser = shlex.shlex(val, posix=True)
+                if not comments:
+                    parser.commenters = ''
+                return list(parser)
         for i in range(len(data)):
-            l = shlex.split(data[i][0], comments=comments)
+            l = f(data[i][0])
             self.assertEqual(l, data[i][1:],
-                             "%s: %s != %s" %
+                             "%r: %r != %r" %
                              (data[i][0], l, data[i][1:]))
 
     def oldSplit(self, s):
@@ -170,12 +186,16 @@
         """Test data splitting with posix parser"""
         self.splitTest(self.posix_data, comments=True)
 
+    def testPosix_unsplit(self):
+        self.splitTest(self.posix_data_shlex_direct, comments=True,
+            use_split=False)
+
     def testCompat(self):
         """Test compatibility interface"""
         for i in range(len(self.data)):
             l = self.oldSplit(self.data[i][0])
             self.assertEqual(l, self.data[i][1:],
-                             "%s: %s != %s" %
+                             "%r: %r != %r" %
                              (self.data[i][0], l, self.data[i][1:]))
 
 # Allow this test to be used with old shlex.py