diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -638,7 +638,7 @@ from test import support from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, STRING, ENDMARKER, ENCODING, tok_name, detect_encoding, - open as tokenize_open) + TokenInfo, open as tokenize_open) from io import BytesIO from unittest import TestCase import os, sys, glob @@ -1109,6 +1109,45 @@ token.NAME, token.AMPER, token.NUMBER, token.RPAR) + def test_untokenize_with_iterator(self): + tokens = [ + TokenInfo(ENCODING, 'utf-8', (0, 0), (0, 0), ''), + TokenInfo(ENDMARKER, '', (0, 0), (0, 0), ''), + ] + iter_tokens = iter(tokens) + + expected = untokenize(tokens) + source = untokenize(iter_tokens) + + self.assertEqual(source, expected) + + def test_untokenize_compat_mode(self): + tokens = [ + TokenInfo(ENCODING, 'utf-8', (0, 0), (0, 0), ''), + TokenInfo(ENDMARKER, '', (0, 0), (0, 0), ''), + ] + truncated = [tok[:2] for tok in tokens] + + expected = untokenize(tokens) + source = untokenize(truncated) + + self.assertEqual(source, expected) + + + def test_untokenize_compat_mode_with_iterator(self): + tokens = [ + TokenInfo(ENCODING, 'utf-8', (0, 0), (0, 0), ''), + TokenInfo(ENDMARKER, '', (0, 0), (0, 0), ''), + ] + truncated = (tok[:2] for tok in tokens) + iter_tokens = iter(tokens) + + expected = untokenize(iter_tokens) + source = untokenize(truncated) + + self.assertEqual(source, expected) + + __test__ = {"doctests" : doctests, 'decistmt': decistmt} def test_main(): diff --git a/Lib/tokenize.py b/Lib/tokenize.py --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -258,8 +258,10 @@ if toknum in (NAME, NUMBER): tokval += ' ' - if toknum in (NEWLINE, NL): + elif toknum in (NEWLINE, NL): startline = True + elif toknum == ENCODING: + self.encoding = tokval prevstring = False for tok in iterable: toknum, tokval = tok[:2]