Index: Doc/library/tokenize.rst =================================================================== --- Doc/library/tokenize.rst (révision 86167) +++ Doc/library/tokenize.rst (copie de travail) @@ -101,16 +101,18 @@ If no encoding is specified, then the default of ``'utf-8'`` will be returned. - :func:`detect_encoding` is useful for robustly reading Python source files. - A common pattern for this follows:: + Use :func:`open_python` to open Python script: it uses + :func:`detect_encoding` to detect the file encoding. - def read_python_source(file_name): - with open(file_name, "rb") as fp: - encoding = tokenize.detect_encoding(fp.readline)[0] - with open(file_name, "r", encoding=encoding) as fp: - return fp.read() +.. function:: open_python(filename) + Open a Python script in read mode using the encoding detected by + :func:`detect_encoding`. + + .. versionadded:: 3.2 + + Example of a script rewriter that transforms float literals into Decimal objects:: @@ -153,4 +155,3 @@ result.append((toknum, tokval)) return untokenize(result).decode('utf-8') - Index: Lib/py_compile.py =================================================================== --- Lib/py_compile.py (révision 86167) +++ Lib/py_compile.py (copie de travail) @@ -104,9 +104,7 @@ byte-compile all installed files (or all files in selected directories). """ - with open(file, "rb") as f: - encoding = tokenize.detect_encoding(f.readline)[0] - with open(file, encoding=encoding) as f: + with tokenize.open_python(file) as f: try: timestamp = int(os.fstat(f.fileno()).st_mtime) except AttributeError: Index: Lib/tabnanny.py =================================================================== --- Lib/tabnanny.py (révision 86167) +++ Lib/tabnanny.py (copie de travail) @@ -93,11 +93,8 @@ check(fullname) return - with open(file, 'rb') as f: - encoding, lines = tokenize.detect_encoding(f.readline) - try: - f = open(file, encoding=encoding) + f = tokenize.open_python(file) except IOError as msg: errprint("%r: I/O Error: %s" % (file, msg)) return Index: Lib/tokenize.py =================================================================== --- Lib/tokenize.py (révision 86167) +++ Lib/tokenize.py (copie de travail) @@ -29,6 +29,7 @@ from token import * from codecs import lookup, BOM_UTF8 import collections +from io import TextIOWrapper cookie_re = re.compile("coding[:=]\s*([-\w.]+)") import token @@ -335,6 +336,18 @@ return default, [first, second] +def open_python(filename): + """ + Open a Python script in read mode with the right encoding. + """ + buffer = open(filename, 'rb') + encoding, line = detect_encoding(buffer.readline) + buffer.seek(0) + text = TextIOWrapper(buffer, encoding, line_buffering=True) + text.mode = 'r' + return text + + def tokenize(readline): """ The tokenize() generator requires one argment, readline, which Index: Lib/trace.py =================================================================== --- Lib/trace.py (révision 86167) +++ Lib/trace.py (copie de travail) @@ -419,10 +419,9 @@ def find_executable_linenos(filename): """Return dict where keys are line numbers in the line number table.""" try: - with io.FileIO(filename, 'r') as file: - encoding, lines = tokenize.detect_encoding(file.readline) - with open(filename, "r", encoding=encoding) as f: + with tokenize.open_python(filename) as f: prog = f.read() + encoding = f.encoding except IOError as err: print(("Not printing coverage data for %r: %s" % (filename, err)), file=sys.stderr) Index: Lib/linecache.py =================================================================== --- Lib/linecache.py (révision 86167) +++ Lib/linecache.py (copie de travail) @@ -123,9 +123,7 @@ else: return [] try: - with open(fullname, 'rb') as fp: - coding, line = tokenize.detect_encoding(fp.readline) - with open(fullname, 'r', encoding=coding) as fp: + with tokenize.open_python(fullname) as fp: lines = fp.readlines() except IOError: return [] Index: Lib/test/test_tokenize.py =================================================================== --- Lib/test/test_tokenize.py (révision 86167) +++ Lib/test/test_tokenize.py (copie de travail) @@ -564,7 +564,7 @@ from test import support from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, - STRING, ENDMARKER, tok_name, detect_encoding) + STRING, ENDMARKER, tok_name, detect_encoding, open_python) from io import BytesIO from unittest import TestCase import os, sys, glob @@ -857,6 +857,19 @@ readline = self.get_readline((b'# coding: bad\n',)) self.assertRaises(SyntaxError, detect_encoding, readline) + def test_open(self): + filename = support.TESTFN + '.py' + try: + for encoding in ('iso-8859-15', 'utf-8'): + with open(filename, 'w', encoding=encoding) as fp: + print("# coding: %s" % encoding, file=fp) + print("print('euro:\u20ac')", file=fp) + with open_python(filename) as fp: + assert fp.encoding == encoding + assert fp.mode == 'r' + finally: + support.unlink(filename) + class TestTokenize(TestCase): def test_tokenize(self):