diff -Naur old/Doc/lib/lib.tex new/Doc/lib/lib.tex --- old/Doc/lib/lib.tex 2006-08-21 10:42:42.000000000 +0200 +++ new/Doc/lib/lib.tex 2006-08-23 10:46:37.000000000 +0200 @@ -274,6 +274,7 @@ \input{libnis} \input{libsyslog} \input{libcommands} +\input{libsplicetee} % ============= diff -Naur old/Doc/lib/libsplicetee.tex new/Doc/lib/libsplicetee.tex --- old/Doc/lib/libsplicetee.tex 1970-01-01 01:00:00.000000000 +0100 +++ new/Doc/lib/libsplicetee.tex 2006-08-22 12:18:52.000000000 +0200 @@ -0,0 +1,80 @@ +\section{\module{splicetee} --- + Interface to the Linux splice()/tee() system calls} + +\declaremodule{extension}{splicetee} + \platform{Linux} +\moduleauthor{Omar AitMous}{Omar.AitMous@gmail.com} +\sectionauthor{Omar AitMous}{Omar.AitMous@gmail.com} + +\modulesynopsis{An interface to the new splice() and tee() system calls under +Linux 2.6.17 and higher.} + + +The \module{splicetee} interfaces the splice() and tee() system calls. +These are Linux 2.6.17 system calls to : + + * move data from a stream to another without need for user-space involvment + and with minimal (or no) copying. + + * transfer data between two pipes without consuming the input. + +The \module{splicetee} module defines the following functions: + +\begin{funcdesc}{splice}{in_fd, off_in, out_fd, off_out, count, flags} +A \function{splice} is a system call mechanism used by Linux kernel to requesat +the application program to generate service from the operating system in order +to transfer information in kernel space without the use of user space and +copying. It is seen as a way of improving I/O performances. + +The core idea is that a process could open a file descriptor for a data source, +and another for a data sink. Then, with a call to \function{splice}, those two +streams could be connected to each other and the data could flow from the +source to the sink entirely within the kernel, with no need for user-space +involvment and with minimal (or no) copying. + +\var{in_fd} and \var{out_fd} are the file descriptors while \var{off_in} and +\var{off_out} are the positions. A call to \function{splice} will cause the +kernel to move up to \var{count} bytes from the data source \var{in_fd} to +\var{out_fd}. + +The \var{flags} parameter modifies how the copy is done. +Currently implemented flags are SPLICE_F_NONBLOCK, SPLICE_F_MORE and +SPLICE_F_MOVE. + +\begin{notice} +Actually one cannot call \function{splice} with \var{in_fd} referring to a +socket nor with \var{in_fd} or \var{out_fd} refering to files on a network file +system, the system call actually works only on ext2fs, ext3fs and raizorfs. +Besides, internally \function{splice} works using the pipe buffer mechanism +added by Linus in early 2005 - that is why one side of the operation is +required to be a pipe for now. +\end{notice} +\end{funcdesc} + +\begin{funcdesc}{tee}{in_fd, out_fd, n, flags} +\var{in_fd} and \var{out_fd} are file descriptors. +This call requires that both file descriptors be pipes. It simply connects +\var{in_fd} and \var{out_fd}, transferring up to \var{n} bytes between them. +Unlike \function{splice}, however, \function{tee} does not consume the input, +enabling the input data to be read normally later on by the calling process. +The \var{flags} used are the SPLICE_F_* variants, currently only applicable one +is SPLICE_F_NONBLOCK. +\end{funcdesc} + +The following data items are available for use in constructing the flags +parameter to the \function{splice} and \function{tee} functions. +\begin{datadesc}{SPLICE_F_MORE} +A hint to the Kernel that more data will come in a subsequent \function{splice} +call. +\end{datadesc} +\begin{datadesc}{SPLICE_F_MOVE} +If the output is a file, this flag will cause the Kernel to attempt to move +pages directly from the input pipe buffer into the output address space, +avoiding a copy operation. +\end{datadesc} +\begin{datadesc}{SPLICE_F_NONBLOCK} +Makes the \function{splice} and \function{tee} operations non-blocking. A call +to \function{splice} and \function{tee} could still block, however, especially +if either of the file descriptors not been set for non-blocking I/O. +\end{datadesc} + diff -Naur old/Lib/test/regrtest.py new/Lib/test/regrtest.py --- old/Lib/test/regrtest.py 2006-08-21 10:44:01.000000000 +0200 +++ new/Lib/test/regrtest.py 2006-08-23 11:06:33.000000000 +0200 @@ -837,6 +837,7 @@ test_pwd test_resource test_signal + test_splicetee test_sunaudiodev test_threadsignals test_timing @@ -900,6 +901,7 @@ test_pwd test_resource test_signal + test_splicetee test_sqlite test_startfile test_sunaudiodev @@ -926,6 +928,7 @@ test_openpty test_pyexpat test_sax + test_splicetee test_startfile test_sqlite test_sunaudiodev @@ -950,6 +953,7 @@ test_openpty test_pyexpat test_sax + test_splicetee test_sqlite test_startfile test_sunaudiodev @@ -979,6 +983,7 @@ test_pyexpat test_queue test_sax + test_splicetee test_sqlite test_startfile test_sunaudiodev @@ -1021,6 +1026,7 @@ test_pty test_pwd test_strop + test_splicetee test_sqlite test_startfile test_sunaudiodev @@ -1051,6 +1057,7 @@ test_ntpath test_ossaudiodev test_poll + test_splicetee test_sqlite test_startfile test_sunaudiodev @@ -1071,6 +1078,7 @@ test_imgfile test_linuxaudiodev test_openpty + test_splicetee test_sqlite test_startfile test_zipfile @@ -1099,6 +1107,7 @@ test_openpty test_pyexpat test_sax + test_splicetee test_sqlite test_startfile test_sunaudiodev @@ -1126,6 +1135,7 @@ test_poll test_popen2 test_resource + test_splicetee test_sqlite test_startfile test_sunaudiodev @@ -1149,6 +1159,7 @@ test_nis test_ossaudiodev test_socketserver + test_splicetee test_sqlite test_sunaudiodev """, @@ -1176,6 +1187,7 @@ test_pty test_resource test_signal + test_splicetee test_sqlite test_startfile test_sunaudiodev @@ -1204,6 +1216,7 @@ test_scriptpackages test_socket_ssl test_socketserver + test_splicetee test_sqlite test_startfile test_sunaudiodev @@ -1235,6 +1248,7 @@ test_macostools test_nis test_ossaudiodev + test_splicetee test_sqlite test_startfile test_sunaudiodev @@ -1269,6 +1283,7 @@ test_plistlib test_scriptpackages test_tcl + test_splicetee test_sqlite test_startfile test_sunaudiodev @@ -1299,6 +1314,7 @@ test_nis test_ossaudiodev test_pep277 + test_splicetee test_sqlite test_startfile test_sunaudiodev diff -Naur old/Lib/test/test_splicetee.py new/Lib/test/test_splicetee.py --- old/Lib/test/test_splicetee.py 1970-01-01 01:00:00.000000000 +0100 +++ new/Lib/test/test_splicetee.py 2006-08-21 14:20:48.000000000 +0200 @@ -0,0 +1,44 @@ +import unittest +import splicetee +import os +from test import test_support + +class SpliceTeeModuleTest(unittest.TestCase): + def read_what_was_written(self, fd, buf): + buf_tmp = "" + while len(buf_tmp) < len(buf): + buf_tmp += os.read(fd, len(buf) - len(buf_tmp)) + if buf_tmp != buf: + self.fail("failed in reading what has been written"%(buf_tmp,buf)) + + def test_read_write(self): + pipe1 = os.pipe() + buf = "splicetee" + written = 0 + while written < len(buf): + count = os.write(pipe1[1],buf[written:]) + written += count + pipe2 = os.pipe() + splicetee.tee(pipe1[0], pipe2[1], len(buf), 0) + self.read_what_was_written(pipe2[0], buf) + + file = os.tmpfile() + splicetee.splice(pipe1[0], 0, file.fileno(), 0, len(buf), 0) + os.lseek(file.fileno(), 0, 0) + self.read_what_was_written(file.fileno(), buf) + + file.close() + os.close(pipe1[0]) + os.close(pipe1[1]) + os.close(pipe2[0]) + os.close(pipe2[1]) + + + +def test_main(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(SpliceTeeModuleTest)) + test_support.run_suite(suite) + +if __name__ == "__main__": + test_main() diff -Naur old/Modules/spliceteemodule.c new/Modules/spliceteemodule.c --- old/Modules/spliceteemodule.c 1970-01-01 01:00:00.000000000 +0100 +++ new/Modules/spliceteemodule.c 2006-08-22 13:40:27.000000000 +0200 @@ -0,0 +1,207 @@ +/* py-splicetee file : spliceteemodule.c + A Python module interface to 'splice' and 'tee' system calls. + + This is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + +#if defined(__i386__) +#define __NR_splice 313 +#define __NR_tee 315 +#elif defined(__ia64__) +#define __NR_splice 1297 +#define __NR_tee 1301 +#elif defined(__x86_64__) || defined(__amd64__) +#define __NR_splice 275 +#define __NR_tee 276 +#elif defined(__powerpc__) || defined(__powerpc64__) +#define __NR_splice 283 +#define __NR_tee 284 +#else +#error unsupported arch (supported ones are i386, ia64, x86_64, powerpc) +#endif + +#define SPLICE_F_MOVE (0x01) +#define SPLICE_F_NONBLOCK (0x02) +#define SPLICE_F_MORE (0x04) +#define SPLICE_F_GIFT (0x08) + +static inline ssize_t splice(int fdin, loff_t *off_in, int fdout, + loff_t *off_out, size_t len, unsigned int flags) +{ + return syscall(__NR_splice, fdin, off_in, fdout, off_out, len, flags); +} + +static inline ssize_t tee(int fdin, int fdout, size_t len, unsigned int flags) +{ + return syscall(__NR_tee, fdin, fdout, len, flags); +} + +static PyObject * +method_splice(PyObject *self, PyObject *args) +{ + int in_fd, out_fd; + loff_t off_in, off_out; + loff_t *p_in = NULL, *p_out = NULL; + size_t len; + unsigned int flags; + + ssize_t sts; + + if(!PyArg_ParseTuple(args, "iLiLki", &in_fd, &off_in, &out_fd, &off_out, &len + , &flags)) + return NULL; + + if(len < 0) { + PyErr_SetString(PyExc_ValueError, "transfer size must be positive."); + return NULL; + } + + if(off_in) + p_in = &off_in; + if(off_out) + p_out = &off_out; + + Py_BEGIN_ALLOW_THREADS; + sts = splice(in_fd, p_in, out_fd, p_out, len, flags); + Py_END_ALLOW_THREADS; + if(sts == -1) { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } else { + return Py_BuildValue("LLk", off_in, off_out, sts); + } +} + +static PyObject * +method_tee(PyObject *self, PyObject *args) +{ + int in_fd, out_fd; + size_t len; + unsigned int flags; + + ssize_t sts; + + if(!PyArg_ParseTuple(args, "iiki", &in_fd, &out_fd, &len, &flags)) + return NULL; + + if(len < 0) { + PyErr_SetString(PyExc_ValueError, "transfer size must be positive."); + return NULL; + } + + Py_BEGIN_ALLOW_THREADS; + sts = tee(in_fd, out_fd, len, flags); + Py_END_ALLOW_THREADS; + + if(sts == -1) { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } else { + return Py_BuildValue("k", sts); + } +} + + + +static PyMethodDef SpliceTeeMethods[] = { + {"splice", method_splice, METH_VARARGS, +"splice(in_fd, off_in, out_fd, off_out, count, flags) = [position_in, \ + position_out, sent]\n" +"\n" +"A splice() is a system call mechanism used by the Linux kernel to request \ +the application program to generate service from the operating system in \ +order to transfer information in kernel space without the use of user space \ +and copying. It is seen as a way of improving I/O performance.\n" +"The core idea is that a process could open a file descriptor for a data \ +source, and another for a data sink. Then, with a call to splice(), those two \ +streams could be connected to each other, and the data could flow from the \ +source to the sink entirely within the kernel, with no need for user-space \ +involvement and with minimal (or no) copying.\n" +"\n" +"'in_fd' and 'out_fd' are the file descriptors while 'off_in' and 'off_out' \ +are the positions. A call tu splice() will cause the kernel to move up to \ +'count' bytes from the data source in_fd to out_fd.\n" +"The 'flags' argument modifies how the copy is done. Currently implemented \ +flags are :\n" +" SPLICE_F_NONBLOCK : makes the splice() operations non-blocking. A call to \ +splice() could still block, however, especially if either of the file \ +descriptors has not been set for non-blocking I/O.\n" +" SPLICE_F_MORE : a hint to the kernel that more data will come in a \ +subsequent splice() call.\n" +" SPLICE_F_MOVE : if the output is a file, this flag will cause the kernel \ +to attempt to move pages directly from the input pipe buffer into the output \ +address space, avoiding a copy operation.\n" +"\n" +"Actually one cannot call splice with in_fd referring to a socket nor with \ +in_fd or out_fd referring to files on a network file system (since splice \ +operations are undefined on those contexts).\n" +"Internally, splice() works using the pipe buffer mechanism added by Linus in \ +early 2005 - that is why one side of the operation is required to be a pipe \ +for now.\n" +}, + {"tee", method_tee, METH_VARARGS, +"tee(in_fd, out_fd, cout, flags) = [sent]\n" +"\n" +"'in_fd' and 'out_fd' are the file descriptors. This call requires that both \ +file descriptors be pipes. It simply connects fdin to fdout, transferring up \ +to len bytes between them. Unlike splice(), however, tee() does not consume \ +the input, enabling the input data to be read normally later on by the \ +calling process.\n" +"The 'flags' used are the SPLICE_F_* variants, currently the only applicable \ +one is SPLICE_F_NONBLOCK. \n" +"\n" +"Now, the advantage of splice()/tee() is that you can do zero-copy movement \ +of data, and unlike sendfile() you can do it on _arbitrary_ data (and, as \ +shown by 'tee()', it's more than just sending the data to somebody else: you \ +can duplicate the data and choose to forward it to two or more different \ +users - for things like logging etc).\n" +"So while sendfile() can send files (surprise surprise), splice() really is \ +a general 'read/write in user space' and then some, so you can forward data \ +from one socket to another, without ever copying it into user space.\n" +"Or, rather than just a boring socket->socket forwarding, you could, for \ +example, forward data that comes from a MPEG-4 hardware encoder, and tee() it \ +to duplicate the stream, and write one of the streams to disk, and the other \ +one to a socket for a real-time broadcast. Again, all without actually \ +physically copying it around in memory.\n" +}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static void +insint (PyObject *d, char *name, int value) +{ + PyObject *v = PyInt_FromLong((long) value); + if (!v || PyDict_SetItemString(d, name, v)) + PyErr_Clear(); + + Py_XDECREF(v); +} + +PyMODINIT_FUNC +initsplicetee(void) +{ + PyObject *m = Py_InitModule("splicetee", SpliceTeeMethods); + + PyObject *d = PyModule_GetDict (m); + + insint (d, "SPLICE_F_MOVE", SPLICE_F_MOVE); + insint (d, "SPLICE_F_NONBLOCK", SPLICE_F_NONBLOCK); + insint (d, "SPLICE_F_MORE", SPLICE_F_MORE); + PyModule_AddStringConstant(m, "__doc__", "Direct interface to Linux \ + 2.6.17+ splice and tee system calls."); + PyModule_AddStringConstant(m, "__version__", "1.0"); +} diff -Naur old/setup.py new/setup.py --- old/setup.py 2006-08-21 10:45:00.000000000 +0200 +++ new/setup.py 2006-08-23 11:07:03.000000000 +0200 @@ -899,6 +899,9 @@ libs = [] exts.append( Extension('nis', ['nismodule.c'], libraries = libs) ) + # Omar AitMous's splice/tee interface + if platform == 'linux2': + exts.append( Extension('splicetee', ['spliceteemodule.c']) ) # Curses support, requiring the System V version of curses, often # provided by the ncurses library.