diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -638,7 +638,7 @@ from test import support from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, STRING, ENDMARKER, ENCODING, tok_name, detect_encoding, - open as tokenize_open) + TokenInfo, COMMENT, NL, open as tokenize_open) from io import BytesIO from unittest import TestCase import os, sys, glob @@ -1109,6 +1109,75 @@ token.NAME, token.AMPER, token.NUMBER, token.RPAR) + def test_untokenize_half_compat(self): + tokens = [ + TokenInfo(ENCODING, 'utf-8', (0, 0), (0, 0), ''), + TokenInfo(COMMENT, '# some test source', + (0, 0), (0, 18), '# some test source\n'), + (NL, '\n'), + (COMMENT, '# more test source'), + (ENDMARKER, ''), + ] + expected = b"# some test source\n# more test source" + source = untokenize(tokens) + + self.assertEqual(source, expected) + + def test_untokenize_half_compat_with_iterator(self): + tokens = [ + TokenInfo(ENCODING, 'utf-8', (0, 0), (0, 0), ''), + TokenInfo(COMMENT, '# some test source', + (0, 0), (0, 18), '# some test source\n'), + (NL, '\n'), + (COMMENT, '# more test source'), + (ENDMARKER, ''), + ] + expected = b"# some test source\n# more test source" + iter_tokens = iter(tokens) + source = untokenize(iter_tokens) + + self.assertEqual(source, expected) + + def test_untokenize_encoding_with_iterator(self): + tokens = [ + TokenInfo(ENCODING, 'utf-8', (0, 0), (0, 0), ''), + TokenInfo(ENDMARKER, '', (0, 0), (0, 0), ''), + ] + iter_tokens = iter(tokens) + + expected = untokenize(tokens) + source = untokenize(iter_tokens) + + self.assertEqual(source, expected) + + def test_untokenize_encoding_compat_mode(self): + tokens = [ + TokenInfo(ENCODING, 'utf-8', (0, 0), (0, 0), ''), + TokenInfo(ENDMARKER, '', (0, 0), (0, 0), ''), + ] + truncated = [tok[:2] for tok in tokens] + + expected = untokenize(tokens) + source = untokenize(truncated) + + self.assertEqual(source, expected) + + + def test_untokenize_encoding_compat_mode_with_iterator(self): + tokens = [ + TokenInfo(ENCODING, 'utf-8', (0, 0), (0, 0), ''), + TokenInfo(ENDMARKER, '', (0, 0), (0, 0), ''), + ] + truncated = (tok[:2] for tok in tokens) + iter_tokens = iter(tokens) + + expected = untokenize(iter_tokens) + source = untokenize(truncated) + + self.assertEqual(source, expected) + + + __test__ = {"doctests" : doctests, 'decistmt': decistmt} def test_main(): diff --git a/Lib/tokenize.py b/Lib/tokenize.py --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -30,6 +30,7 @@ from token import * from codecs import lookup, BOM_UTF8 import collections +import itertools from io import TextIOWrapper cookie_re = re.compile("coding[:=]\s*([-\w.]+)") @@ -234,6 +235,7 @@ self.tokens.append(" " * col_offset) def untokenize(self, iterable): + iterable = iter(iterable) for t in iterable: if len(t) == 2: self.compat(t, iterable) @@ -261,7 +263,7 @@ if toknum in (NEWLINE, NL): startline = True prevstring = False - for tok in iterable: + for tok in itertools.chain([token], iterable): toknum, tokval = tok[:2] if toknum == ENCODING: self.encoding = tokval