Index: Lib/tokenize.py =================================================================== --- Lib/tokenize.py (révision 78638) +++ Lib/tokenize.py (copie de travail) @@ -307,13 +307,23 @@ If no encoding is specified, then the default of 'utf-8' will be returned. """ + def _universal_newline(readline): + while True: + try: + raw_line = readline() + except StopIteration: + break + if raw_line == b'': + break + for line in raw_line.splitlines(1): + yield line + while True: + yield b'' + + universal_newline = _universal_newline(readline) + bom_found = False encoding = None - def read_or_stop(): - try: - return readline() - except StopIteration: - return b'' def find_cookie(line): try: @@ -336,7 +346,7 @@ raise SyntaxError('encoding problem: utf-8') return encoding - first = read_or_stop() + first = next(universal_newline) if first.startswith(BOM_UTF8): bom_found = True first = first[3:] @@ -347,7 +357,7 @@ if encoding: return encoding, [first] - second = read_or_stop() + second = next(universal_newline) if not second: return 'utf-8', [first] Index: Lib/test/test_tokenize.py =================================================================== --- Lib/test/test_tokenize.py (révision 78638) +++ Lib/test/test_tokenize.py (copie de travail) @@ -844,6 +844,17 @@ readline = self.get_readline((b'# coding: bad\n',)) self.assertRaises(SyntaxError, detect_encoding, readline) + def test_mac_newlines(self): + lines = ( + # Mac newline + b'# coding: ISO-8859-1\r', + # Any non ASCII line of text + b'print("Bonjour ma ch\xe8re amie")', + ) + encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + self.assertEquals(encoding, 'iso-8859-1') + self.assertEquals(consumed_lines, [b'# coding: ISO-8859-1\r']) + class TestTokenize(TestCase): def test_tokenize(self):