diff -r 6d278f426417 Doc/library/tokenize.rst
--- a/Doc/library/tokenize.rst	Fri Jul 05 18:05:29 2013 -1000
+++ b/Doc/library/tokenize.rst	Sun Jul 07 13:18:24 2013 +0200
@@ -87,7 +87,10 @@
 
     Converts tokens back into Python source code.  The *iterable* must return
     sequences with at least two elements, the token type and the token string.
-    Any additional sequence elements are ignored.
+    If five elements are present, it is assumed that they are formatted
+    like output of :func:`tokenize` and additional elements are used to
+    restore original whitespace. Otherwise any additional sequence elements
+    are ignored.
 
     The reconstructed script is returned as a single string.  The result is
     guaranteed to tokenize back to match the input so that the conversion is
@@ -96,7 +99,8 @@
     positions) may change.
 
     It returns bytes, encoded using the ENCODING token, which is the first
-    token sequence output by :func:`tokenize`.
+    token sequence output by :func:`tokenize`. If ENCODING token in not present
+    output will be encoded using 'utf-8'.
 
 
 :func:`tokenize` needs to detect the encoding of source files it tokenizes. The
diff -r 6d278f426417 Lib/test/test_tokenize.py
--- a/Lib/test/test_tokenize.py	Fri Jul 05 18:05:29 2013 -1000
+++ b/Lib/test/test_tokenize.py	Sun Jul 07 13:18:24 2013 +0200
@@ -641,7 +641,7 @@
                      open as tokenize_open)
 from io import BytesIO
 from unittest import TestCase
-import os, sys, glob
+import os, re, sys, glob
 import token
 
 def dump_tokens(s):
@@ -1113,7 +1113,139 @@
         # See http://bugs.python.org/issue16152
         self.assertExactTypeEqual('@          ', token.AT)
 
-__test__ = {"doctests" : doctests, 'decistmt': decistmt}
+
+class TestUntokenize(TestCase):
+
+    def get_tokens(self, source):
+        self.assertIsInstance(source, bytes)
+        readline = BytesIO(source).readline
+        tokens = tokenize(readline)
+        return tokens
+
+    def get_tokens_compat(self, source, tuple_elements):
+        tokens = self.get_tokens(source)
+        return (token[:tuple_elements] for token in tokens)
+
+    def checkUntokenize(self, input_, tuple_elements=None, pattern=None,
+            drop_enc=False):
+        tokens = self.get_tokens(input_)
+        if tuple_elements is not None:
+            tokens = (tuple_elements(token) for token in tokens)
+        if drop_enc:  # Skip the ENCODING token.
+            encoding_token = next(tokens)
+            self.assertEqual(encoding_token[0], ENCODING)
+            self.assertEqual(encoding_token[1], 'utf-8')
+        source = untokenize(tokens)
+        self.assertIsInstance(source, bytes)
+        # Testing 'compat' implementation (2-element tuples as input)
+        # produces different whitespacing than original source file
+        # and we use regexp to compare output.
+        if pattern is None:
+            self.assertEqual(source, input_)
+        else:
+            self.assertRegexpMatches(source.decode('utf-8'), pattern)
+
+    def assertUntokenizeResult(self, input_, pattern):
+        # Run against original tokens.
+        self.checkUntokenize(input_)
+        self.checkUntokenize(input_, drop_enc=True)
+
+        # Run against 2-element tokens.
+        tuple_elements = lambda token: token[:2]
+        self.checkUntokenize(input_, tuple_elements=tuple_elements,
+            pattern=pattern)
+        self.checkUntokenize(input_, tuple_elements=tuple_elements,
+            pattern=pattern, drop_enc=True)
+
+        # Run agains tokens of unexpected length.
+        tuple_elements = lambda token: token[:2] + (None, )
+        self.checkUntokenize(input_, tuple_elements=tuple_elements,
+            pattern=pattern)
+        self.checkUntokenize(input_, tuple_elements=tuple_elements,
+            pattern=pattern, drop_enc=True)
+
+        tuple_elements = lambda token: token[:2] + (None, ) * 2
+        self.checkUntokenize(input_, tuple_elements=tuple_elements,
+            pattern=pattern)
+        self.checkUntokenize(input_, tuple_elements=tuple_elements,
+            pattern=pattern, drop_enc=True)
+
+        tuple_elements = lambda token: token[:2] + (None, ) * 4
+        self.checkUntokenize(input_, tuple_elements=tuple_elements,
+            pattern=pattern)
+        self.checkUntokenize(input_, tuple_elements=tuple_elements,
+            pattern=pattern, drop_enc=True)
+
+        tuple_elements = lambda token: token[:2] + (None, ) * 5
+        self.checkUntokenize(input_, tuple_elements=tuple_elements,
+            pattern=pattern)
+        self.checkUntokenize(input_, tuple_elements=tuple_elements,
+            pattern=pattern, drop_enc=True)
+
+    def test_untokenize_simple(self):
+        input_ = b"1 + 2"
+        pattern = re.compile(r'^1\ ?\+\ ?2\ ?$')
+        self.assertUntokenizeResult(input_, pattern)
+
+    def test_untokenize_medium(self):
+        input_ = (
+            b"def div(a, b):\n"
+            b"    if b == 0:\n"
+            b"        return None\n"
+            b"    else:\n"
+            b"        return a / b\n")
+        pattern = re.compile(
+            r"^def div\ ?\(a\ ?,\ ?b\ ?\):\n"
+            r"    if b\ ?==\ ?0\ ?:\n"
+            r"        return None\ ?\n"
+            r"    else\ ?:\n"
+            r"        return a\ ?/\ ?b\ ?\n$")
+        self.assertUntokenizeResult(input_, pattern)
+
+    def test_untokenize_complex(self):
+        input_ = (
+            b'#!/usr/bin/env python3\n'
+            b'# -*- coding: utf-8 -*-\n'
+            b'import functools\n'
+            b'\n'
+            b'class Example:\n'
+            b'    """docstring"""\n'
+            b'    def __init__(self, prop):\n'
+            b'        self.prop = prop\n'
+            b'\n'
+            b'    def __call__(self, func):\n'
+            b'        @functools.wraps(func)\n'
+            b'        def wrapper(param):\n'
+            b'            # Comment.\n'
+            b'            if param:\n'
+            b'               return param\n'
+            b'            else:\n'
+            b'                return -1\n'
+            b'        return wrapper\n')
+        pattern = re.compile(
+            r'^#!/usr/bin/env python3\n'
+            r'# -\*- coding: utf-8 -\*-\n'
+            r'import functools\ ?\n'
+            r'\n'
+            r'class Example\ ?:\n'
+            r'    """docstring"""\n'
+            r'    def __init__\ ?\(self\ ?,\ ?prop\ ?\):\n'
+            r'        self\ ?.prop\ ?=\ ?prop\ ?\n'
+            r'\n'
+            r'    def __call__\ ?\(self\ ?,\ ?func\ ?\):\n'
+            r'        @functools\ ?.wraps\ ?\(func\ ?\)\n'
+            r'        def wrapper\ ?\(param\ ?\):\n'
+            r'\ *# Comment.\n'
+            r'            if param\ ?:\n'
+            r'               return param\ ?\n'
+            r'            else\ ?:\n'
+            r'                return -1\ ?\n'
+            r'        return wrapper\ ?\n$')
+        self.assertUntokenizeResult(input_, pattern)
+
+
+__test__ = {"doctests": doctests, 'decistmt': decistmt}
+
 
 def test_main():
     from test import test_tokenize
@@ -1122,6 +1254,7 @@
     support.run_unittest(Test_Tokenize)
     support.run_unittest(TestDetectEncoding)
     support.run_unittest(TestTokenize)
+    support.run_unittest(TestUntokenize)
 
 if __name__ == "__main__":
     test_main()
diff -r 6d278f426417 Lib/tokenize.py
--- a/Lib/tokenize.py	Fri Jul 05 18:05:29 2013 -1000
+++ b/Lib/tokenize.py	Sun Jul 07 13:18:24 2013 +0200
@@ -25,6 +25,7 @@
                'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
                'Michael Foord')
 import builtins
+import itertools
 import re
 import sys
 from token import *
@@ -228,14 +229,14 @@
 
     def add_whitespace(self, start):
         row, col = start
-        assert row <= self.prev_row
+        assert row >= self.prev_row
         col_offset = col - self.prev_col
         if col_offset:
             self.tokens.append(" " * col_offset)
 
     def untokenize(self, iterable):
         for t in iterable:
-            if len(t) == 2:
+            if len(t) != 5:
                 self.compat(t, iterable)
                 break
             tok_type, token, start, end, line = t
@@ -254,14 +255,10 @@
         startline = False
         indents = []
         toks_append = self.tokens.append
-        toknum, tokval = token
 
-        if toknum in (NAME, NUMBER):
-            tokval += ' '
-        if toknum in (NEWLINE, NL):
-            startline = True
         prevstring = False
-        for tok in iterable:
+        # We still need to process the first read token.
+        for tok in itertools.chain([token], iterable):
             toknum, tokval = tok[:2]
             if toknum == ENCODING:
                 self.encoding = tokval
@@ -296,10 +293,14 @@
     """Transform tokens back into Python source code.
     It returns a bytes object, encoded using the ENCODING
     token, which is the first token sequence output by tokenize.
+    If ENCODING token is not present, output will be encoded with 'utf-8'.
 
     Each element returned by the iterable must be a token sequence
-    with at least two elements, a token number and token value.  If
-    only two tokens are passed, the resulting output is poor.
+    with at least two elements, a token number and token value.
+    If 5 elements are present, it is assumed that they are formatted like output
+    of tokenize and additional elements are used to restore original
+    whitespace. If different number of elements is present, the resulting output
+    has poor whitespace.
 
     Round-trip invariant for full input:
         Untokenized source will match input source exactly
@@ -314,9 +315,10 @@
     """
     ut = Untokenizer()
     out = ut.untokenize(iterable)
-    if ut.encoding is not None:
-        out = out.encode(ut.encoding)
-    return out
+
+    # Use utf-8 as default encoding if encoding token was not provided.
+    encoding = ut.encoding if ut.encoding is not None else 'utf-8'
+    return out.encode(encoding)
 
 
 def _get_normal_name(orig_enc):