Index: Lib/io.py =================================================================== --- Lib/io.py (Revision 67608) +++ Lib/io.py (Arbeitskopie) @@ -58,7 +58,7 @@ import os import abc import codecs -import _fileio +from _fileio import _FileIO, count_lineendings # Import _thread instead of threading to reduce startup cost try: from _thread import allocate_lock as Lock @@ -602,7 +602,7 @@ self._unsupported("write") -class FileIO(_fileio._FileIO, RawIOBase): +class FileIO(_FileIO, RawIOBase): """Raw I/O implementation for OS files.""" @@ -612,11 +612,11 @@ # to do since _fileio.c is written in C). def __init__(self, name, mode="r", closefd=True): - _fileio._FileIO.__init__(self, name, mode, closefd) + _FileIO.__init__(self, name, mode, closefd) self._name = name def close(self): - _fileio._FileIO.close(self) + _FileIO.close(self) RawIOBase.close(self) @property @@ -1303,9 +1303,7 @@ self.buffer = b'' # Record which newlines are read - crlf = output.count('\r\n') - cr = output.count('\r') - crlf - lf = output.count('\n') - crlf + crlf, cr, lf = count_lineendings(output) self.seennl |= (lf and self._LF) | (cr and self._CR) \ | (crlf and self._CRLF) @@ -1932,9 +1930,7 @@ self._pending = "" # Record which newlines are read - crlf = input.count('\r\n') - cr = input.count('\r') - crlf - lf = input.count('\n') - crlf + crlf, cr, lf = count_lineendings(input) self._seennl |= (lf and self._LF) | (cr and self._CR) \ | (crlf and self._CRLF) Index: Lib/test/test_fileio.py =================================================================== --- Lib/test/test_fileio.py (Revision 67608) +++ Lib/test/test_fileio.py (Arbeitskopie) @@ -248,7 +248,29 @@ self.assertRaises(ValueError, _fileio._FileIO, "/some/invalid/name", "rt") self.assertEqual(w.warnings, []) + def test_count_lineendings(self): + def assertCle(s, crlf, cr, lf): + got = _fileio.count_lineendings(s) + expected = (crlf, cr, lf) + self.assertEqual(got, expected, + "%r != %r for %r" % (got, expected, s)) + assertCle("", 0, 0, 0) + assertCle("\r", 0, 1, 0) + assertCle("\n", 0, 0, 1) + assertCle("\r\n", 1, 0, 0) + assertCle("\r\r\n", 1, 1, 0) + assertCle("\r\n\n", 1, 0, 1) + assertCle("\n\r\n", 1, 0, 1) + assertCle("\r\n\r\n", 2, 0, 0) + assertCle("\n\r\n\r", 1, 1, 1) + + self.assertRaises(TypeError, _fileio.count_lineendings, b'') + self.assertRaises(TypeError, _fileio.count_lineendings, 1) + self.assertRaises(TypeError, _fileio.count_lineendings, object) + self.assertRaises(TypeError, _fileio.count_lineendings, object()) + + def test_main(): # Historically, these tests have been sloppy about removing TESTFN. # So get rid of it no matter what. Index: Modules/_fileio.c =================================================================== --- Modules/_fileio.c (Revision 67608) +++ Modules/_fileio.c (Arbeitskopie) @@ -904,7 +904,72 @@ PyObject_Del, /* tp_free */ }; + +PyObject* +fileio_count_lineendings(PyObject *self, PyObject *arg) +{ + PyObject *result = NULL; + PyUnicodeObject *ustr; + Py_ssize_t crlf = 0, lf = 0, cr = 0; + Py_UNICODE *uc, *ulast; + + ustr = (PyUnicodeObject *)PyUnicode_FromObject(arg); + if (ustr == NULL) { + goto end; + } + if (ustr->length == 0) { + goto success; + } + + /* str is \0 terminated */ + ulast = &ustr->str[ustr->length-1]; + + #define UNICODE_CR 0x000D + #define UNICODE_LF 0x000A + for (uc = ustr->str; uc < ulast; uc++) { + switch(*uc) { + case(UNICODE_CR): + if (*(uc+1) == UNICODE_LF) { + crlf++; + uc++; + } else { + cr++; + } + break; + case(UNICODE_LF): + lf++; + break; + } + } + if (uc == ulast) { + switch(*uc) { + case(UNICODE_CR): + cr++; + break; + case(UNICODE_LF): + lf++; + break; + } + + } + #undef UNICODE_CR + #undef UNICODE_LF + success: + result = Py_BuildValue("nnn", crlf, cr, lf); + end: + Py_XDECREF(ustr); + return result; +} + + +PyDoc_STRVAR(fileio_count_lineendings__doc__, +"count_lineendings(s: str) -> (crlf, cr, lf)\n\n" +"Return count of '\\r\\n', '\\r' and '\\n' inside s. '\\r\\n' counts \n" +"only to crlf." +); + static PyMethodDef module_methods[] = { + {"count_lineendings", fileio_count_lineendings, METH_O, fileio_count_lineendings__doc__}, {NULL, NULL} };