diff -r 1141648fa655 Doc/library/tokenize.rst
--- a/Doc/library/tokenize.rst	Sat Oct 06 13:49:34 2012 +0200
+++ b/Doc/library/tokenize.rst	Sat Oct 06 14:51:00 2012 +0200
@@ -86,14 +86,17 @@ write back the modified script.
 .. function:: untokenize(iterable)
 
     Converts tokens back into Python source code.  The *iterable* must return
-    sequences with at least two elements, the token type and the token string.
-    Any additional sequence elements are ignored.
+    sequences with either two or five elements: the token type and the token
+    string, optionally followed by the source location as in the result of
+    :func:`tokenize`.
 
     The reconstructed script is returned as a single string.  The result is
     guaranteed to tokenize back to match the input so that the conversion is
     lossless and round-trips are assured.  The guarantee applies only to the
-    token type and token string as the spacing between tokens (column
-    positions) may change.
+    token type and token string as the spacing between tokens (column positions)
+    may change.  If the iterable provides five elemnents per token, whitespace
+    in the output will be much closer to the original, but some constructs (like
+    line continuations with backslashes) will not be recovered literally.
 
     It returns bytes, encoded using the ENCODING token, which is the first
     token sequence output by :func:`tokenize`.
diff -r 1141648fa655 Lib/test/test_tokenize.py
--- a/Lib/test/test_tokenize.py	Sat Oct 06 13:49:34 2012 +0200
+++ b/Lib/test/test_tokenize.py	Sat Oct 06 14:51:00 2012 +0200
@@ -662,18 +662,29 @@ def roundtrip(f):
     The source code in f is tokenized, converted back to source code via
     tokenize.untokenize(), and tokenized again from the latter. The test
     fails if the second tokenization doesn't match the first.
+
+    We also check that the argument to untokenize() can be an iterator,
+    see bug #8478.
     """
     if isinstance(f, str):
         f = BytesIO(f.encode('utf-8'))
+    code = f.read()
+    readline = (line for line in code.splitlines(keepends=True)).__next__
     try:
-        token_list = list(tokenize(f.readline))
+        token_list = list(tokenize(readline))
     finally:
         f.close()
+    # "compat" mode with only two-element tuples
     tokens1 = [tok[:2] for tok in token_list]
-    new_bytes = untokenize(tokens1)
+    tokeniter = iter(tokens1)
+    new_bytes = untokenize(tokeniter)
     readline = (line for line in new_bytes.splitlines(keepends=True)).__next__
     tokens2 = [tok[:2] for tok in tokenize(readline)]
-    return tokens1 == tokens2
+    # new mode with (more or less) correct whitespace handling
+    new_bytes2 = untokenize(iter(token_list))
+    readline = (line for line in new_bytes2.splitlines(keepends=True)).__next__
+    tokens3 = [tok[:2] for tok in tokenize(readline)]
+    return tokens1 == tokens2 == tokens3
 
 # This is an example from the docs, set up as a doctest.
 def decistmt(s):
diff -r 1141648fa655 Lib/tokenize.py
--- a/Lib/tokenize.py	Sat Oct 06 13:49:34 2012 +0200
+++ b/Lib/tokenize.py	Sat Oct 06 14:51:00 2012 +0200
@@ -31,6 +31,7 @@ from token import *
 from codecs import lookup, BOM_UTF8
 import collections
 from io import TextIOWrapper
+from itertools import chain
 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
 
 import token
@@ -228,15 +229,18 @@ class Untokenizer:
 
     def add_whitespace(self, start):
         row, col = start
-        assert row <= self.prev_row
+        assert row >= self.prev_row
         col_offset = col - self.prev_col
         if col_offset:
             self.tokens.append(" " * col_offset)
 
     def untokenize(self, iterable):
-        for t in iterable:
+        it = iter(iterable)
+        for t in it:
             if len(t) == 2:
-                self.compat(t, iterable)
+                # we've already consumed the first element of the iterator,
+                # so have to supply it back here
+                self.compat(t, chain([t], it))
                 break
             tok_type, token, start, end, line = t
             if tok_type == ENCODING:
@@ -299,10 +303,11 @@ def untokenize(iterable):
 
     Each element returned by the iterable must be a token sequence
     with at least two elements, a token number and token value.  If
-    only two tokens are passed, the resulting output is poor.
+    only two elements are passed, the resulting output is poor.
 
     Round-trip invariant for full input:
-        Untokenized source will match input source exactly
+        Untokenized source will match input source up to backslash
+        continuations
 
     Round-trip invariant for limited intput:
         # Output bytes will tokenize the back to the input