# HG changeset patch # User Zbigniew Jędrzejewski-Szmek # Date 1289734217 -3600 # Branch py3k # Node ID fbc9f31c83bc7ad28f8f4ea9a17661c07db82b18 # Parent 1a821081b470e60672becf2ad6ee387c733da076 Make 2to3 behave better with non-decodable files 1. Try to run the fixer only on files ending with '.py', not just 'py'. 2. In case decoding fails, don't provide the full traceback, but just the name of the failing file and the error message. The test for file names to fix is the same as the test used to find fixers. Example output for failing files: ... RefactoringTool: Decoding failed in ./BAD.py: 'utf8' codec can't decode byte 0x93 in position 0: invalid start byte diff -r 1a821081b470 -r fbc9f31c83bc Lib/lib2to3/refactor.py --- a/Lib/lib2to3/refactor.py Sat Nov 13 14:37:49 2010 +0100 +++ b/Lib/lib2to3/refactor.py Sun Nov 14 12:30:17 2010 +0100 @@ -307,8 +307,7 @@ dirnames.sort() filenames.sort() for name in filenames: - if not name.startswith(".") and \ - os.path.splitext(name)[1].endswith("py"): + if not name.startswith(".") and name.endswith(".py"): fullname = os.path.join(dirpath, name) self.refactor_file(fullname, write, doctests_only) # Modify dirnames in-place to remove subdirs with leading dots @@ -323,12 +322,15 @@ except IOError as err: self.log_error("Can't open %s: %s", filename, err) return None, None - try: + with f: encoding = tokenize.detect_encoding(f.readline)[0] - finally: - f.close() with _open_with_encoding(filename, "r", encoding=encoding) as f: - return _from_system_newlines(f.read()), encoding + try: + data = f.read() + except UnicodeDecodeError as err: + self.log_error("Decoding failed in %s: %s", filename, err) + return None, None + return _from_system_newlines(data), encoding def refactor_file(self, filename, write=False, doctests_only=False): """Refactors a file."""