Index: Lib/idlelib/ScriptBinding.py =================================================================== --- Lib/idlelib/ScriptBinding.py (révision 66703) +++ Lib/idlelib/ScriptBinding.py (copie de travail) @@ -38,6 +38,28 @@ by Format->Untabify Region and specify the number of columns used by each tab. """ +# See also PyTokenizer_FindEncoding() and get_coding_spec() +# in Parser/tokenizer.c +def detect_encoding(filename, default_encoding='ASCII'): + """ + Detect the encoding of a Python script: find the "#coding:" header at line + 1 or line 2. + """ + # Match "coding: utf8" or "coding=ISO-88596-1" + coding_regex = re.compile('#.*coding[:=][ \t]+([a-z0-9._-]+)', re.IGNORECASE) + with open(filename, 'rb') as f: + data = f.read(4096) + lines = re.split(b'[\r\n]+', data) + for number, line in enumerate(lines): + if number == 2: + break + line = str(line, 'ASCII') + match = coding_regex.match(line) + if not match: + continue + return match.group(1) + return default_encoding + class ScriptBinding: menudefs = [ @@ -53,16 +75,17 @@ self.root = self.editwin.root def check_module_event(self, event): + encoding = detect_encoding(filename) filename = self.getfilename() if not filename: return 'break' - if not self.checksyntax(filename): + if not self.checksyntax(filename, encoding): return 'break' - if not self.tabnanny(filename): + if not self.tabnanny(filename, encoding): return 'break' - def tabnanny(self, filename): - f = open(filename, 'r') + def tabnanny(self, filename, encoding): + f = open(filename, 'r', encoding=encoding) try: tabnanny.process_tokens(tokenize.generate_tokens(f.readline)) except tokenize.TokenError as msg: @@ -78,11 +101,11 @@ return False return True - def checksyntax(self, filename): + def checksyntax(self, filename, encoding): self.shell = shell = self.flist.open_shell() saved_stream = shell.get_warning_stream() shell.set_warning_stream(shell.stderr) - f = open(filename, 'r') + f = open(filename, 'r', encoding=encoding) source = f.read() f.close() if '\r' in source: @@ -121,10 +144,11 @@ filename = self.getfilename() if not filename: return 'break' - code = self.checksyntax(filename) + encoding = detect_encoding(filename) + code = self.checksyntax(filename, encoding) if not code: return 'break' - if not self.tabnanny(filename): + if not self.tabnanny(filename, encoding): return 'break' shell = self.shell interp = shell.interp