diff -r 628bd1ebfa22 Lib/idlelib/IOBinding.py --- a/Lib/idlelib/IOBinding.py Fri Mar 18 03:03:10 2016 +0000 +++ b/Lib/idlelib/IOBinding.py Thu Mar 17 13:49:41 2016 +0200 @@ -62,7 +62,7 @@ locale_encoding = locale_encoding.lower( encoding = locale_encoding ### KBK 07Sep07 This is used all over IDLE, check! ### 'encoding' is used below in encode(), check! -coding_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII) +coding_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) def coding_spec(data): diff -r 628bd1ebfa22 Lib/lib2to3/pgen2/tokenize.py --- a/Lib/lib2to3/pgen2/tokenize.py Fri Mar 18 03:03:10 2016 +0000 +++ b/Lib/lib2to3/pgen2/tokenize.py Thu Mar 17 13:49:41 2016 +0200 @@ -236,7 +236,7 @@ class Untokenizer: startline = False toks_append(tokval) -cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII) +cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) def _get_normal_name(orig_enc): diff -r 628bd1ebfa22 Lib/test/test_importlib/source/test_source_encoding.py --- a/Lib/test/test_importlib/source/test_source_encoding.py Fri Mar 18 03:03:10 2016 +0000 +++ b/Lib/test/test_importlib/source/test_source_encoding.py Thu Mar 17 13:49:41 2016 +0200 @@ -14,7 +14,7 @@ import unittest import warnings -CODING_RE = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII) +CODING_RE = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) class EncodingTest: diff -r 628bd1ebfa22 Lib/test/test_source_encoding.py --- a/Lib/test/test_source_encoding.py Fri Mar 18 03:03:10 2016 +0000 +++ b/Lib/test/test_source_encoding.py Thu Mar 17 13:49:41 2016 +0200 @@ -1,11 +1,12 @@ # -*- coding: koi8-r -*- import unittest -from test.support import TESTFN, unlink, unload, rmtree +from test.support import TESTFN, unlink, unload, rmtree, script_helper, captured_stdout import importlib import os import sys import subprocess +import tempfile class SourceEncodingTest(unittest.TestCase): @@ -142,5 +143,64 @@ class SourceEncodingTest(unittest.TestCa msg=c.exception.args[0]) +class AbstractSourceEncodingTest: + + def test_first_coding_line(self): + src = (b'#coding:iso8859-15\n' + b'print(ascii("\xc3\xa4"))\n') + out = self.run_script(src) + self.assertEqual(out.rstrip(), br"'\xc3\u20ac'") + + def test_second_coding_line(self): + src = (b'#\n' + b'#coding:iso8859-15\n' + b'print(ascii("\xc3\xa4"))\n') + out = self.run_script(src) + self.assertEqual(out.rstrip(), br"'\xc3\u20ac'") + + def test_double_coding_same_line(self): + src = (b'#coding:iso8859-15 coding:latin1\n' + b'print(ascii("\xc3\xa4"))\n') + out = self.run_script(src) + self.assertEqual(out.rstrip(), br"'\xc3\u20ac'") + + def test_double_coding_line(self): + src = (b'#coding:iso8859-15\n' + b'#coding:latin1\n' + b'print(ascii("\xc3\xa4"))\n') + out = self.run_script(src) + self.assertEqual(out.rstrip(), br"'\xc3\u20ac'") + + def test_first_non_utf8_coding_line(self): + src = (b'#coding:iso-8859-15 \xa4\n' + b'print(ascii("\xc3\xa4"))\n') + out = self.run_script(src) + self.assertEqual(out.rstrip(), br"'\xc3\u20ac'") + + def test_second_non_utf8_coding_line(self): + src = (b'\n' + b'#coding:iso-8859-15 \xa4\n' + b'print(ascii("\xc3\xa4"))\n') + out = self.run_script(src) + self.assertEqual(out.rstrip(), br"'\xc3\u20ac'") + +class StringSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase): + + def run_script(self, src): + with captured_stdout() as stdout: + exec(src) + return stdout.getvalue().encode() + +class FileSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase): + + def run_script(self, src): + with tempfile.TemporaryDirectory() as tmpd: + fn = os.path.join(tmpd, "test.py") + with open(fn, "wb") as fp: + fp.write(src) + res = script_helper.assert_python_ok(fn) + return res.out + + if __name__ == "__main__": unittest.main() diff -r 628bd1ebfa22 Lib/tokenize.py --- a/Lib/tokenize.py Fri Mar 18 03:03:10 2016 +0000 +++ b/Lib/tokenize.py Thu Mar 17 13:49:41 2016 +0200 @@ -34,7 +34,7 @@ import re import sys from token import * -cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII) +cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) import token diff -r 628bd1ebfa22 Parser/tokenizer.c --- a/Parser/tokenizer.c Fri Mar 18 03:03:10 2016 +0000 +++ b/Parser/tokenizer.c Thu Mar 17 13:49:41 2016 +0200 @@ -275,6 +275,7 @@ get_coding_spec(const char *s, char **sp return 0; } *spec = r; + break; } } } diff -r 628bd1ebfa22 Tools/scripts/findnocoding.py --- a/Tools/scripts/findnocoding.py Fri Mar 18 03:03:10 2016 +0000 +++ b/Tools/scripts/findnocoding.py Thu Mar 17 13:49:41 2016 +0200 @@ -32,7 +32,7 @@ except ImportError: "no sophisticated Python source file search will be done.", file=sys.stderr) -decl_re = re.compile(rb'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)') +decl_re = re.compile(rb'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)') blank_re = re.compile(rb'^[ \t\f]*(?:[#\r\n]|$)') def get_declaration(line):