Use tokenize.detect_encoding() to read the encoding of Python scripts instead of using default io encoding (utf-8). See also related issue: #5093 Index: Lib/lib2to3/refactor.py =================================================================== --- Lib/lib2to3/refactor.py (révision 71720) +++ Lib/lib2to3/refactor.py (copie de travail) @@ -20,6 +20,7 @@ import operator from collections import defaultdict from itertools import chain +from tokenize import detect_encoding # Local imports from .pgen2 import driver @@ -213,17 +214,22 @@ # Modify dirnames in-place to remove subdirs with leading dots dirnames[:] = [dn for dn in dirnames if not dn.startswith(".")] + def _read_file_content(self, filename): + try: + with open(filename, "rb") as f: + encoding, lines = detect_encoding(f.readline) + except IOError as err: + self.log_error("Can't read %s: %s", filename, err) + return None + with open(filename, encoding=encoding) as f: + return f.read() + def refactor_file(self, filename, write=False, doctests_only=False): """Refactors a file.""" - try: - f = open(filename) - except IOError as err: - self.log_error("Can't open %s: %s", filename, err) + input = self._read_file_content(filename) + if input is None: return - try: - input = f.read() + "\n" # Silence certain parse errors - finally: - f.close() + input += "\n" # Silence certain parse errors if doctests_only: self.log_debug("Refactoring doctests in %s", filename) output = self.refactor_docstring(input, filename) @@ -327,15 +333,9 @@ """ self.files.append(filename) if old_text is None: - try: - f = open(filename, "r") - except IOError as err: - self.log_error("Can't read %s: %s", filename, err) + old_text = self._read_file_content(filename) + if old_text is None: return - try: - old_text = f.read() - finally: - f.close() if old_text == new_text: self.log_debug("No changes to %s", filename) return