Index: Lib/idlelib/ScriptBinding.py =================================================================== --- Lib/idlelib/ScriptBinding.py (révision 66750) +++ Lib/idlelib/ScriptBinding.py (copie de travail) @@ -24,6 +24,7 @@ import tokenize import tkinter.messagebox as tkMessageBox from idlelib.EditorWindow import EditorWindow +from idlelib.IOBinding import detect_encoding from idlelib import PyShell from idlelib.configHandler import idleConf @@ -53,16 +54,17 @@ self.root = self.editwin.root def check_module_event(self, event): + encoding = detect_encoding(filename) filename = self.getfilename() if not filename: return 'break' - if not self.checksyntax(filename): + if not self.checksyntax(filename, encoding): return 'break' - if not self.tabnanny(filename): + if not self.tabnanny(filename, encoding): return 'break' - def tabnanny(self, filename): - f = open(filename, 'r') + def tabnanny(self, filename, encoding): + f = open(filename, 'r', encoding=encoding) try: tabnanny.process_tokens(tokenize.generate_tokens(f.readline)) except tokenize.TokenError as msg: @@ -78,11 +80,11 @@ return False return True - def checksyntax(self, filename): + def checksyntax(self, filename, encoding): self.shell = shell = self.flist.open_shell() saved_stream = shell.get_warning_stream() shell.set_warning_stream(shell.stderr) - f = open(filename, 'r') + f = open(filename, 'r', encoding=encoding) source = f.read() f.close() if '\r' in source: @@ -121,10 +123,11 @@ filename = self.getfilename() if not filename: return 'break' - code = self.checksyntax(filename) + encoding = detect_encoding(filename) + code = self.checksyntax(filename, encoding) if not code: return 'break' - if not self.tabnanny(filename): + if not self.tabnanny(filename, encoding): return 'break' shell = self.shell interp = shell.interp Index: Lib/idlelib/IOBinding.py =================================================================== --- Lib/idlelib/IOBinding.py (révision 66750) +++ Lib/idlelib/IOBinding.py (copie de travail) @@ -8,11 +8,10 @@ import re from tkinter import * from tkinter.simpledialog import SimpleDialog +import tokenize from idlelib.configHandler import idleConf -from codecs import BOM_UTF8 - # Try setting the locale, so that we can find out # what encoding to use try: @@ -62,7 +61,14 @@ encoding = locale_encoding ### KBK 07Sep07 This is used all over IDLE, check! ### 'encoding' is used below in encode(), check! -coding_re = re.compile("coding[:=]\s*([-\w_.]+)") +def detect_encoding(filename): + """ + Detect the encoding of a Python script: find the "#coding:" header at line + 1 or line 2. + """ + with open(filename, 'rb') as f: + encoding, line = tokenize.detect_encoding(f.readline) + return encoding class EncodingMessage(SimpleDialog): "Inform user that an encoding declaration is needed." @@ -110,42 +116,7 @@ def do_edit(self): self.done(1) -def coding_spec(data): - """Return the encoding declaration according to PEP 263. - When checking encoded data, only the first two lines should be passed - in to avoid a UnicodeDecodeError if the rest of the data is not unicode. - The first two lines would contain the encoding specification. - - Raise a LookupError if the encoding is declared but unknown. - """ - if isinstance(data, bytes): - try: - lines = data.decode('utf-8') - except UnicodeDecodeError: - return None - else: - lines = data - # consider only the first two lines - if '\n' in lines: - lst = lines.split('\n')[:2] - elif '\r' in lines: - lst = lines.split('\r')[:2] - else: - lst = list(lines) - str = '\n'.join(lst) - match = coding_re.search(str) - if not match: - return None - name = match.group(1) - try: - codecs.lookup(name) - except LookupError: - # The standard encoding error does not indicate the encoding - raise LookupError("Unknown encoding: "+name) - return name - - class IOBinding: def __init__(self, editwin): @@ -240,36 +211,21 @@ self.text.focus_set() return "break" - eol = r"(\r\n)|\n|\r" # \r\n (Windows), \n (UNIX), or \r (Mac) - eol_re = re.compile(eol) eol_convention = os.linesep # default def loadfile(self, filename): try: - # open the file in binary mode so that we can handle - # end-of-line convention ourselves. - f = open(filename,'rb') - two_lines = f.readline() + f.readline() - f.seek(0) - bytes = f.read() - f.close() - except IOError as msg: - tkMessageBox.showerror("I/O Error", str(msg), master=self.text) - return False - chars = self._decode(two_lines, bytes) - if chars is None: + encoding = detect_encoding(filename) + with open(filename, 'r', encoding=encoding) as f: + lines = f.readlines() + except (UnicodeDecodeError, SyntaxError): tkMessageBox.showerror("Decoding Error", "File %s\nFailed to Decode" % filename, parent=self.text) return False - # We now convert all end-of-lines to '\n's - firsteol = self.eol_re.search(chars) - if firsteol: - self.eol_convention = firsteol.group(0) - chars = self.eol_re.sub(r"\n", chars) self.text.delete("1.0", "end") self.set_filename(None) - self.text.insert("1.0", chars) + self.text.insert("1.0", '\n'.join(lines)) self.reset_undo() self.set_filename(filename) self.text.mark_set("insert", "1.0") @@ -277,62 +233,6 @@ self.updaterecentfileslist(filename) return True - def _decode(self, two_lines, bytes): - "Create a Unicode string." - chars = None - # Check presence of a UTF-8 signature first - if bytes.startswith(BOM_UTF8): - try: - chars = bytes[3:].decode("utf-8") - except UnicodeDecodeError: - # has UTF-8 signature, but fails to decode... - return None - else: - # Indicates that this file originally had a BOM - self.fileencoding = 'BOM' - return chars - # Next look for coding specification - try: - enc = coding_spec(two_lines) - except LookupError as name: - tkMessageBox.showerror( - title="Error loading the file", - message="The encoding '%s' is not known to this Python "\ - "installation. The file may not display correctly" % name, - master = self.text) - enc = None - except UnicodeDecodeError: - return None - if enc: - try: - chars = str(bytes, enc) - self.fileencoding = enc - return chars - except UnicodeDecodeError: - pass - # Try ascii: - try: - chars = str(bytes, 'ascii') - self.fileencoding = None - return chars - except UnicodeDecodeError: - pass - # Try utf-8: - try: - chars = str(bytes, 'utf-8') - self.fileencoding = 'utf-8' - return chars - except UnicodeDecodeError: - pass - # Finally, try the locale's encoding. This is deprecated; - # the user should declare a non-ASCII encoding - try: - chars = str(bytes, locale_encoding) - self.fileencoding = locale_encoding - except UnicodeDecodeError: - pass - return chars # None on failure - def maybesave(self): if self.get_saved(): return "yes"