diff -r c7f1acdd8be1 Doc/library/fileinput.rst --- a/Doc/library/fileinput.rst Mon Feb 01 12:47:15 2016 +0100 +++ b/Doc/library/fileinput.rst Wed Apr 27 12:07:21 2016 -0400 @@ -188,10 +188,16 @@ Usage example: ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed)`` -.. function:: hook_encoded(encoding) +.. function:: hook_encoded(encoding, errors=None) Returns a hook which opens each file with :func:`codecs.open`, using the given - *encoding* to read the file. + *encoding* to read the file. *errors* is an optional string that specifies how + encoding and decoding errors are to be handled--this cannot be used in binary + mode. A variety of standard error handlers are available (listed under + :ref:`error-handlers`). Usage example: ``fi = - fileinput.FileInput(openhook=fileinput.hook_encoded("iso-8859-1"))`` + fileinput.FileInput(openhook=fileinput.hook_encoded("iso-8859-1", errors="replace"))`` + + .. versionchanged:: 3.6 + Added the *errors* keyword argument. diff -r c7f1acdd8be1 Doc/whatsnew/3.6.rst --- a/Doc/whatsnew/3.6.rst Mon Feb 01 12:47:15 2016 +0100 +++ b/Doc/whatsnew/3.6.rst Wed Apr 27 12:07:21 2016 -0400 @@ -140,6 +140,12 @@ (Contributed by Nikolay Bogoychev in :issue:`16099`.) +fileinput +--------- + +:func:`~fileinput.hook_encoded` now supports the *errors* keyword argument. + + Optimizations ============= diff -r c7f1acdd8be1 Lib/fileinput.py --- a/Lib/fileinput.py Mon Feb 01 12:47:15 2016 +0100 +++ b/Lib/fileinput.py Wed Apr 27 12:07:21 2016 -0400 @@ -412,9 +412,9 @@ return open(filename, mode) -def hook_encoded(encoding): +def hook_encoded(encoding, errors=None): def openhook(filename, mode): - return open(filename, mode, encoding=encoding) + return open(filename, mode, encoding=encoding, errors=errors) return openhook diff -r c7f1acdd8be1 Lib/test/test_fileinput.py --- a/Lib/test/test_fileinput.py Mon Feb 01 12:47:15 2016 +0100 +++ b/Lib/test/test_fileinput.py Wed Apr 27 12:07:21 2016 -0400 @@ -873,7 +873,8 @@ def test(self): encoding = object() - result = fileinput.hook_encoded(encoding) + errors = object() + result = fileinput.hook_encoded(encoding, errors=errors) fake_open = InvocationRecorder() original_open = builtins.open @@ -891,8 +892,26 @@ self.assertIs(args[0], filename) self.assertIs(args[1], mode) self.assertIs(kwargs.pop('encoding'), encoding) + self.assertIs(kwargs.pop('errors'), errors) self.assertFalse(kwargs) + def test_errors(self): + with open(TESTFN, 'wb') as f: + f.write(b'\x80abc') + self.addCleanup(safe_unlink, TESTFN) + + def check(errors, expected_lines): + with FileInput(files=TESTFN, mode='r', + openhook=hook_encoded('utf-8', errors=errors)) as fi: + lines = list(fi) + self.assertEqual(lines, expected_lines) + + check('ignore', ['abc']) + with self.assertRaises(UnicodeDecodeError): + check('strict', ['abc']) + check('replace', ['\ufffdabc']) + check('backslashreplace', ['\\x80abc']) + def test_modes(self): with open(TESTFN, 'wb') as f: # UTF-7 is a convenient, seldom used encoding diff -r c7f1acdd8be1 Misc/ACKS --- a/Misc/ACKS Mon Feb 01 12:47:15 2016 +0100 +++ b/Misc/ACKS Wed Apr 27 12:07:21 2016 -0400 @@ -534,6 +534,7 @@ Lars Gustäbel Thomas Güttler Jonas H. +Joseph Hackman Barry Haddow Philipp Hagemeister Paul ten Hagen diff -r c7f1acdd8be1 Misc/NEWS --- a/Misc/NEWS Mon Feb 01 12:47:15 2016 +0100 +++ b/Misc/NEWS Wed Apr 27 12:07:21 2016 -0400 @@ -166,6 +166,9 @@ Library ------- +- Issue #25788: fileinput.hook_encoded() now supports an errors argument + for passing to open. + - Issue #26202: copy.deepcopy() now correctly copies range() objects with non-atomic attributes.