diff -r 0926adcc335c Lib/fileinput.py --- a/Lib/fileinput.py Mon Feb 24 15:36:45 2014 -0500 +++ b/Lib/fileinput.py Wed Feb 26 19:12:18 2014 +0200 @@ -387,9 +387,10 @@ def hook_encoded(encoding): - import codecs + import io def openhook(filename, mode): - return codecs.open(filename, mode, encoding) + mode = mode.replace('U', '').replace('b', '') or 'r' + return io.open(filename, mode, encoding=encoding, newline='') return openhook diff -r 0926adcc335c Lib/test/test_fileinput.py --- a/Lib/test/test_fileinput.py Mon Feb 24 15:36:45 2014 -0500 +++ b/Lib/test/test_fileinput.py Wed Feb 26 19:12:18 2014 +0200 @@ -218,8 +218,48 @@ finally: remove_tempfiles(t1) + def test_readline(self): + with open(TESTFN, 'wb') as f: + f.write('A\nB\r\nC\r') + # Fill TextIOWrapper buffer. + f.write('123456789\n' * 1000) + # Issue #20501: readline() shouldn't read whole file. + f.write('\x80') + self.addCleanup(safe_unlink, TESTFN) + + fi = FileInput(files=TESTFN, openhook=hook_encoded('ascii'), bufsize=8) + self.assertEqual(fi.readline(), u'A\n') + self.assertEqual(fi.readline(), u'B\r\n') + self.assertEqual(fi.readline(), u'C\r') + with self.assertRaises(UnicodeDecodeError): + # Read to the end of file. + list(fi) + fi.close() + +class Test_hook_encoded(unittest.TestCase): + """Unit tests for fileinput.hook_encoded()""" + + def test_modes(self): + # Unlikely UTF-7 is locale encoding + with open(TESTFN, 'wb') as f: + f.write('A\nB\r\nC\rD+IKw-') + t1 = TESTFN + #t1 = writeTmp(1, ['A\nB\r\nC\rD+IKw-'], mode='wb') + self.addCleanup(safe_unlink, t1) + + def check(mode, expected_lines): + fi = FileInput(files=t1, mode=mode, openhook=hook_encoded('utf-7')) + lines = list(fi) + fi.close() + self.assertEqual(lines, expected_lines) + + check('r', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac']) + check('rU', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac']) + check('U', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac']) + check('rb', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac']) + def test_main(): - run_unittest(BufferSizesTests, FileInputTests) + run_unittest(BufferSizesTests, FileInputTests, Test_hook_encoded) if __name__ == "__main__": test_main() diff -r 0926adcc335c Misc/NEWS --- a/Misc/NEWS Mon Feb 24 15:36:45 2014 -0500 +++ b/Misc/NEWS Wed Feb 26 19:12:18 2014 +0200 @@ -40,6 +40,9 @@ Library ------- +- Issue #20501: fileinput module no longer reads whole file into memory when using + fileinput.hook_encoded. + - Issue #6815: os.path.expandvars() now supports non-ASCII Unicode environment variables names and values.