diff -r 3877bf2e3235 Doc/library/fileinput.rst --- a/Doc/library/fileinput.rst Mon Mar 12 09:46:44 2012 -0700 +++ b/Doc/library/fileinput.rst Tue Mar 13 00:35:31 2012 -0700 @@ -47,13 +47,14 @@ You can control how files are opened by providing an opening hook via the *openhook* parameter to :func:`fileinput.input` or :class:`FileInput()`. The hook must be a function that takes two arguments, *filename* and *mode*, and -returns an accordingly opened file-like object. Two useful hooks are already -provided by this module. +returns an accordingly opened file-like object. If *encoding* is specified, +it will be passed to the hook as an aditional argument. Two useful hooks are +already provided by this module. The following function is the primary interface of this module: -.. function:: input(files=None, inplace=False, backup='', bufsize=0, mode='r', openhook=None) +.. function:: input(files=None, inplace=False, backup='', bufsize=0, mode='r', openhook=None, encoding=None) Create an instance of the :class:`FileInput` class. The instance will be used as global state for the functions of this module, and is also returned to use @@ -132,7 +133,7 @@ available for subclassing as well: -.. class:: FileInput(files=None, inplace=False, backup='', bufsize=0, mode='r', openhook=None) +.. class:: FileInput(files=None, inplace=False, backup='', bufsize=0, mode='r', openhook=None, encoding=None) Class :class:`FileInput` is the implementation; its methods :meth:`filename`, :meth:`fileno`, :meth:`lineno`, :meth:`filelineno`, :meth:`isfirstline`, @@ -149,6 +150,10 @@ *filename* and *mode*, and returns an accordingly opened file-like object. You cannot use *inplace* and *openhook* together. + If the *encoding* is None, FileInput will use the encoding defined in + ``locale.getpreferredencoding``. If specified, it will be used to open + all files. + A :class:`FileInput` instance can be used as a context manager in the :keyword:`with` statement. In this example, *input* is closed after the :keyword:`with` statement is exited, even if an exception occurs:: @@ -159,6 +164,9 @@ .. versionchanged:: 3.2 Can be used as a context manager. + .. versionchanged:: 3.3 + Added *encoding* parameter to FileInput. + **Optional in-place filtering:** if the keyword argument ``inplace=True`` is passed to :func:`fileinput.input` or to the :class:`FileInput` constructor, the @@ -174,13 +182,17 @@ The two following opening hooks are provided by this module: -.. function:: hook_compressed(filename, mode) +.. function:: hook_compressed(filename, mode, encoding=None) Transparently opens files compressed with gzip and bzip2 (recognized by the extensions ``'.gz'`` and ``'.bz2'``) using the :mod:`gzip` and :mod:`bz2` modules. If the filename extension is not ``'.gz'`` or ``'.bz2'``, the file is opened normally (ie, using :func:`open` without any decompression). + The *encoding* value is passed to to :class:`io.TextIOWrapper` for + compressed files and open for normal files. If None is provided, + ``locale.getpreferredencoding`` will be used. + Usage example: ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed)`` diff -r 3877bf2e3235 Lib/fileinput.py --- a/Lib/fileinput.py Mon Mar 12 09:46:44 2012 -0700 +++ b/Lib/fileinput.py Tue Mar 13 00:35:31 2012 -0700 @@ -78,8 +78,9 @@ - read(), read(size), even readlines() """ +import io +import sys, os -import sys, os __all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno", "isfirstline", "isstdin", "FileInput"] @@ -89,7 +90,7 @@ DEFAULT_BUFSIZE = 8*1024 def input(files=None, inplace=False, backup="", bufsize=0, - mode="r", openhook=None): + mode="r", openhook=None, encoding=None): """input(files=None, inplace=False, backup="", bufsize=0, \ mode="r", openhook=None) @@ -101,7 +102,7 @@ global _state if _state and _state._file: raise RuntimeError("input() already active") - _state = FileInput(files, inplace, backup, bufsize, mode, openhook) + _state = FileInput(files, inplace, backup, bufsize, mode, openhook, encoding) return _state def close(): @@ -196,7 +197,7 @@ """ def __init__(self, files=None, inplace=False, backup="", bufsize=0, - mode="r", openhook=None): + mode="r", openhook=None, encoding=None): if isinstance(files, str): files = (files,) else: @@ -220,6 +221,7 @@ self._backupfilename = None self._buffer = [] self._bufindex = 0 + self._encoding = encoding # restrict mode argument to reading modes if mode not in ('r', 'rU', 'U', 'rb'): raise ValueError("FileInput opening mode must be one of " @@ -350,7 +352,16 @@ else: # This may raise IOError if self._openhook: - self._file = self._openhook(self._filename, self._mode) + # Custom hooks made previous to Python3.3 didn't have + # encoding argument + if self._encoding is None: + self._file = self._openhook(self._filename, + self._mode) + else: + self._file = self._openhook( + self._filename, self._mode, encoding=self._encoding) + + else: self._file = open(self._filename, self._mode) self._buffer = self._file.readlines(self._bufsize) @@ -385,16 +396,17 @@ return self._isstdin -def hook_compressed(filename, mode): +def hook_compressed(filename, mode, encoding=None): ext = os.path.splitext(filename)[1] if ext == '.gz': import gzip - return gzip.open(filename, mode) + stream = gzip.open(filename, mode) elif ext == '.bz2': import bz2 - return bz2.BZ2File(filename, mode) + stream = bz2.BZ2File(filename, mode) else: - return open(filename, mode) + return open(filename, mode, encoding=encoding) + return io.TextIOWrapper(stream, encoding) def hook_encoded(encoding): diff -r 3877bf2e3235 Lib/test/test_fileinput.py --- a/Lib/test/test_fileinput.py Mon Mar 12 09:46:44 2012 -0700 +++ b/Lib/test/test_fileinput.py Tue Mar 13 00:35:31 2012 -0700 @@ -2,6 +2,7 @@ Tests for fileinput module. Nick Mathewson ''' +import io import os import sys import re @@ -247,7 +248,7 @@ class CustomOpenHook: def __init__(self): self.invoked = False - def __call__(self, *args): + def __call__(self, *args, **kargs): self.invoked = True return open(*args) @@ -258,6 +259,14 @@ fi.readline() self.assertTrue(custom_open_hook.invoked, "openhook not invoked") + def test_file_hook_backward_compatibility(self): + def old_hook(filename, mode): + return io.StringIO("I used to receive only filename and mode") + t = writeTmp(1, ["\n"]) + with FileInput([t], openhook=old_hook) as fi: + result = fi.readline() + self.assertEqual(result, "I used to receive only filename and mode") + def test_context_manager(self): try: t1 = writeTmp(1, ["A\nB\nC"]) @@ -404,11 +413,12 @@ """A class that mocks out fileinput.FileInput for use during unit tests""" def __init__(self, files=None, inplace=False, backup="", bufsize=0, - mode="r", openhook=None): + mode="r", openhook=None, encoding=None): self.files = files self.inplace = inplace self.backup = backup self.bufsize = bufsize + self.encoding = encoding self.mode = mode self.openhook = openhook self._file = None @@ -514,11 +524,12 @@ bufsize = object() mode = object() openhook = object() + encoding = object() # call fileinput.input() with different values for each argument result = fileinput.input(files=files, inplace=inplace, backup=backup, - bufsize=bufsize, - mode=mode, openhook=openhook) + bufsize=bufsize, mode=mode, + openhook=openhook, encoding=encoding) # ensure fileinput._state was set to the returned object self.assertIs(result, fileinput._state, "fileinput._state") @@ -741,12 +752,16 @@ self.assertIs(retval, isstdin_retval) self.assertIs(fileinput._state, instance) -class InvocationRecorder: +class InvocationRecorder(object): + def __init__(self): self.invocation_count = 0 + def __call__(self, *args, **kwargs): self.invocation_count += 1 self.last_invocation = (args, kwargs) + return io.BytesIO(b'some bytes') + class Test_hook_compressed(unittest.TestCase): """Unit tests for fileinput.hook_compressed()""" @@ -772,6 +787,17 @@ self.assertEqual(self.fake_open.invocation_count, 1) self.assertEqual(self.fake_open.last_invocation, (("test.gz", 3), {})) + @unittest.skipUnless(gzip, "Requires gzip and zlib") + def test_gz_with_encoding_fake(self): + original_open = gzip.open + gzip.open = lambda filename, mode: io.BytesIO(b'Ex-binary string') + try: + result = fileinput.hook_compressed("test.gz", 3, encoding="utf-8") + finally: + gzip.open = original_open + self.assertEqual(list(result), ['Ex-binary string']) + + @unittest.skipUnless(bz2, "Requires bz2") def test_bz2_ext_fake(self): original_open = bz2.BZ2File @@ -802,7 +828,7 @@ self.assertEqual(self.fake_open.invocation_count, 1) self.assertEqual(self.fake_open.last_invocation, - ((filename, mode), {})) + ((filename, mode), {'encoding': None})) @staticmethod def replace_builtin_open(new_open_func): diff -r 3877bf2e3235 Misc/ACKS --- a/Misc/ACKS Mon Mar 12 09:46:44 2012 -0700 +++ b/Misc/ACKS Tue Mar 13 00:35:31 2012 -0700 @@ -21,6 +21,7 @@ Nir Aides Yaniv Aknin Jyrki Alakuijala +Tatiana Al-Chueyr Ray Allen Billy G. Allie Kevin Altis