sleepy_penguin RubyGem user+dev discussion/patches/pulls/bugs/help
 help / color / mirror / code / Atom feed
* [sleepy_penguin PATCH 0/2] splice/tee/copy_file_range support
@ 2016-03-16  3:13 Eric Wong
  2016-03-16  3:13 ` [PATCH 1/2] support the splice(2) and tee(2) syscalls Eric Wong
  2016-03-16  3:13 ` [PATCH 2/2] implement copy_file_range support for Linux 4.5+ Eric Wong
  0 siblings, 2 replies; 4+ messages in thread
From: Eric Wong @ 2016-03-16  3:13 UTC (permalink / raw)
  To: sleepy-penguin; +Cc: ruby-io-splice

Linux 4.5 was just released the other day with
copy_file_range(2) support.  This is similar to splice(2), but
meant to operate on regular files rather than pipes.

copy_file_range(2) does not have glibc support, yet,
so it's only supported on x86-64 and x86, for now.

Blatantly stealing code from the "io_splice" gem, but omitting
vmsplice support for now since I never figured out a good way to
use it in Ruby without a mmap(2) wrapper...

So with that, I'm planning to slowly phase out the "io_splice"
gem since this one absorbs its functionality.  And I will add
vmsplice support here if we end up supporting mmap, somehow.

I also plan to support sendfile as that's another related
syscall to these, but may also support a "bsd_sendfile"
with support for writing header/trailer vectors...

Eric Wong (2):
      support the splice(2) and tee(2) syscalls
      implement copy_file_range support for Linux 4.5+

 .document                     |   1 +
 ext/sleepy_penguin/cfr.c      |  68 +++++++++
 ext/sleepy_penguin/extconf.rb |   5 +
 ext/sleepy_penguin/init.c     |   5 +
 ext/sleepy_penguin/sp_copy.h  |  50 +++++++
 ext/sleepy_penguin/splice.c   | 328 ++++++++++++++++++++++++++++++++++++++++++
 ext/sleepy_penguin/util.c     |   3 +
 test/test_cfr.rb              |  29 ++++
 test/test_splice.rb           | 254 ++++++++++++++++++++++++++++++++
 test/test_splice_eintr.rb     |  34 +++++
 10 files changed, 777 insertions(+)


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] support the splice(2) and tee(2) syscalls
  2016-03-16  3:13 [sleepy_penguin PATCH 0/2] splice/tee/copy_file_range support Eric Wong
@ 2016-03-16  3:13 ` Eric Wong
  2017-01-02  2:31   ` Eric Wong
  2016-03-16  3:13 ` [PATCH 2/2] implement copy_file_range support for Linux 4.5+ Eric Wong
  1 sibling, 1 reply; 4+ messages in thread
From: Eric Wong @ 2016-03-16  3:13 UTC (permalink / raw)
  To: sleepy-penguin; +Cc: ruby-io-splice, Eric Wong

Since these are Linux-specific syscalls, it makes sense to
include it here.  This is taken from the "io_splice" RubyGem,
but this may supercede that.

Note: this does not include a vmsplice(2) wrapper
---
 .document                     |   1 +
 ext/sleepy_penguin/extconf.rb |   4 +
 ext/sleepy_penguin/init.c     |   3 +
 ext/sleepy_penguin/splice.c   | 376 ++++++++++++++++++++++++++++++++++++++++++
 ext/sleepy_penguin/util.c     |   3 +
 test/test_splice.rb           | 254 ++++++++++++++++++++++++++++
 test/test_splice_eintr.rb     |  34 ++++
 7 files changed, 675 insertions(+)
 create mode 100644 ext/sleepy_penguin/splice.c
 create mode 100644 test/test_splice.rb
 create mode 100644 test/test_splice_eintr.rb

diff --git a/.document b/.document
index 4e5ae53..5fe6bb1 100644
--- a/.document
+++ b/.document
@@ -9,3 +9,4 @@ ext/sleepy_penguin/init.c
 ext/sleepy_penguin/inotify.c
 ext/sleepy_penguin/timerfd.c
 ext/sleepy_penguin/kqueue.c
+ext/sleepy_penguin/splice.c
diff --git a/ext/sleepy_penguin/extconf.rb b/ext/sleepy_penguin/extconf.rb
index eda7fcd..46d1059 100644
--- a/ext/sleepy_penguin/extconf.rb
+++ b/ext/sleepy_penguin/extconf.rb
@@ -21,6 +21,10 @@
 have_func('clock_gettime', 'time.h')
 have_func('epoll_create1', %w(sys/epoll.h))
 have_func('inotify_init1', %w(sys/inotify.h))
+have_func('splice', %w(fcntl.h))
+have_func('tee', %w(fcntl.h))
+have_macro('F_GETPIPE_SZ', %w(fcntl.h))
+have_macro('F_SETPIPE_SZ', %w(fcntl.h))
 have_func('rb_thread_call_without_gvl')
 have_func('rb_thread_blocking_region')
 have_func('rb_thread_io_blocking_region')
diff --git a/ext/sleepy_penguin/init.c b/ext/sleepy_penguin/init.c
index 776d6e0..93e8092 100644
--- a/ext/sleepy_penguin/init.c
+++ b/ext/sleepy_penguin/init.c
@@ -52,6 +52,8 @@ void sleepy_penguin_init_signalfd(void);
 #  define sleepy_penguin_init_signalfd() for(;0;)
 #endif
 
+void sleepy_penguin_init_splice(void);
+
 static size_t l1_cache_line_size_detect(void)
 {
 #ifdef _SC_LEVEL1_DCACHE_LINESIZE
@@ -127,4 +129,5 @@ void Init_sleepy_penguin_ext(void)
 	sleepy_penguin_init_eventfd();
 	sleepy_penguin_init_inotify();
 	sleepy_penguin_init_signalfd();
+	sleepy_penguin_init_splice();
 }
diff --git a/ext/sleepy_penguin/splice.c b/ext/sleepy_penguin/splice.c
new file mode 100644
index 0000000..d2f9206
--- /dev/null
+++ b/ext/sleepy_penguin/splice.c
@@ -0,0 +1,376 @@
+#include "sleepy_penguin.h"
+#ifdef HAVE_SPLICE
+#include <errno.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <sys/uio.h>
+#include <limits.h>
+#include <unistd.h>
+
+static VALUE sym_EAGAIN;
+
+#ifndef F_LINUX_SPECIFIC_BASE
+#  define F_LINUX_SPECIFIC_BASE 1024
+#endif
+
+#ifndef F_GETPIPE_SZ
+#  define F_SETPIPE_SZ    (F_LINUX_SPECIFIC_BASE + 7)
+#  define F_GETPIPE_SZ    (F_LINUX_SPECIFIC_BASE + 8)
+#endif
+
+#ifndef SSIZET2NUM
+#  define SSIZET2NUM(x) LONG2NUM(x)
+#endif
+#ifndef NUM2SIZET
+#  define NUM2SIZET(x) NUM2ULONG(x)
+#endif
+
+static int check_fileno(VALUE io)
+{
+	int saved_errno = errno;
+	int fd = rb_sp_fileno(io);
+	errno = saved_errno;
+	return fd;
+}
+
+#if defined(HAVE_RB_THREAD_CALL_WITHOUT_GVL) && defined(HAVE_RUBY_THREAD_H)
+/* Ruby 2.0+ */
+#  include <ruby/thread.h>
+#  define WITHOUT_GVL(fn,a,ubf,b) \
+        rb_thread_call_without_gvl((fn),(a),(ubf),(b))
+#elif defined(HAVE_RB_THREAD_BLOCKING_REGION)
+typedef VALUE (*my_blocking_fn_t)(void*);
+#  define WITHOUT_GVL(fn,a,ubf,b) \
+	rb_thread_blocking_region((my_blocking_fn_t)(fn),(a),(ubf),(b))
+
+#else /* Ruby 1.8 */
+/* partial emulation of the 1.9 rb_thread_blocking_region under 1.8 */
+#  include <rubysig.h>
+#  define RUBY_UBF_IO ((rb_unblock_function_t *)-1)
+typedef void rb_unblock_function_t(void *);
+typedef void * rb_blocking_function_t(void *);
+static void * WITHOUT_GVL(rb_blocking_function_t *func, void *data1,
+			rb_unblock_function_t *ubf, void *data2)
+{
+	void *rv;
+
+	assert(RUBY_UBF_IO == ubf && "RUBY_UBF_IO required for emulation");
+
+	TRAP_BEG;
+	rv = func(data1);
+	TRAP_END;
+
+	return rv;
+}
+#endif /* ! HAVE_RB_THREAD_BLOCKING_REGION */
+
+#define IO_RUN(fn,data) WITHOUT_GVL((fn),(data),RUBY_UBF_IO,0)
+
+struct splice_args {
+	int fd_in;
+	int fd_out;
+	off_t *off_in;
+	off_t *off_out;
+	size_t len;
+	unsigned flags;
+};
+
+static void *nogvl_splice(void *ptr)
+{
+	struct splice_args *a = ptr;
+
+	return (void *)splice(a->fd_in, a->off_in, a->fd_out, a->off_out,
+	                     a->len, a->flags);
+}
+
+static ssize_t do_splice(int argc, VALUE *argv, unsigned dflags)
+{
+	off_t i = 0, o = 0;
+	VALUE io_in, off_in, io_out, off_out, len, flags;
+	struct splice_args a;
+	ssize_t bytes;
+	ssize_t total = 0;
+
+	rb_scan_args(argc, argv, "51",
+	             &io_in, &off_in, &io_out, &off_out, &len, &flags);
+
+	a.off_in = NIL_P(off_in) ? NULL : (i = NUM2OFFT(off_in), &i);
+	a.off_out = NIL_P(off_out) ? NULL : (o = NUM2OFFT(off_out), &o);
+	a.len = NUM2SIZET(len);
+	a.flags = NIL_P(flags) ? dflags : NUM2UINT(flags) | dflags;
+
+	for (;;) {
+		a.fd_in = check_fileno(io_in);
+		a.fd_out = check_fileno(io_out);
+		bytes = (ssize_t)IO_RUN(nogvl_splice, &a);
+		if (bytes < 0) {
+			if (errno == EINTR)
+				continue;
+			if (total > 0)
+				return total;
+			return bytes;
+		} else if (bytes == 0) {
+			break;
+		} else {
+			return bytes;
+		}
+	}
+
+	return total;
+}
+
+/*
+ * call-seq:
+ *    SleepyPenguin.splice(io_in, off_in, io_out, off_out, len) => integer
+ *    SleepyPenguin.splice(io_in, off_in, io_out, off_out, len, flags) => integer
+ *
+ * Splice +len+ bytes from/to a pipe.  Either +io_in+ or +io_out+
+ * MUST be a pipe.  +io_in+ and +io_out+ may BOTH be pipes as of
+ * Linux 2.6.31 or later.
+ *
+ * +off_in+ and +off_out+ if non-nil may be used to
+ * specify an offset for the non-pipe file descriptor.
+ *
+ * +flags+ defaults to zero if unspecified.
+ * +flags+ may be a bitmask of the following flags:
+ *
+ * * SleepyPenguin::F_MOVE
+ * * SleepyPenguin::F_NONBLOCK
+ * * SleepyPenguin::F_MORE
+ *
+ * Returns the number of bytes spliced.
+ * Raises EOFError when +io_in+ has reached end of file.
+ * Raises Errno::EAGAIN if the SleepyPenguin::F_NONBLOCK flag is set
+ * and the pipe has no data to read from or space to write to.  May
+ * also raise Errno::EAGAIN if the non-pipe descriptor has no data
+ * to read from or space to write to.
+ *
+ * As splice never exposes buffers to userspace, it will not take
+ * into account userspace buffering done by Ruby or stdio.  It is
+ * also not subject to encoding/decoding filters under Ruby 1.9.
+ *
+ * Consider using SleepyPenguin.trysplice if +io_out+ is a pipe or if you are using
+ * non-blocking I/O on both descriptors as it avoids the cost of raising
+ * common Errno::EAGAIN exceptions.
+ *
+ * See manpage for full documentation:
+ * http://kernel.org/doc/man-pages/online/pages/man2/splice.2.html
+ */
+static VALUE my_splice(int argc, VALUE *argv, VALUE self)
+{
+	ssize_t n = do_splice(argc, argv, 0);
+
+	if (n == 0)
+		rb_eof_error();
+	if (n < 0)
+		rb_sys_fail("splice");
+	return SSIZET2NUM(n);
+}
+
+/*
+ * call-seq:
+ *    SleepyPenguin.trysplice(io_in, off_in, io_out, off_out, len) => integer
+ *    SleepyPenguin.trysplice(io_in, off_in, io_out, off_out, len, flags) => integer
+ *
+ * Exactly like SleepyPenguin.splice, except +:EAGAIN+ is returned when either
+ * the read or write end would block instead of raising Errno::EAGAIN.
+ *
+ * SleepyPenguin::F_NONBLOCK is always passed for the pipe descriptor,
+ * but this can still block if the non-pipe descriptor is blocking.
+ *
+ * See SleepyPenguin.splice documentation for more details.
+ *
+ * This method is recommended whenever +io_out+ is a pipe.
+ */
+static VALUE trysplice(int argc, VALUE *argv, VALUE self)
+{
+	ssize_t n = do_splice(argc, argv, SPLICE_F_NONBLOCK);
+
+	if (n == 0)
+		return Qnil;
+	if (n < 0) {
+		if (errno == EAGAIN)
+			return sym_EAGAIN;
+		rb_sys_fail("splice");
+	}
+	return SSIZET2NUM(n);
+}
+
+struct tee_args {
+	int fd_in;
+	int fd_out;
+	size_t len;
+	unsigned flags;
+};
+
+/* runs without GVL */
+static void *nogvl_tee(void *ptr)
+{
+	struct tee_args *a = ptr;
+
+	return (void *)tee(a->fd_in, a->fd_out, a->len, a->flags);
+}
+
+static ssize_t do_tee(int argc, VALUE *argv, unsigned dflags)
+{
+	VALUE io_in, io_out, len, flags;
+	struct tee_args a;
+	ssize_t bytes;
+	ssize_t total = 0;
+
+	rb_scan_args(argc, argv, "31", &io_in, &io_out, &len, &flags);
+	a.len = (size_t)NUM2SIZET(len);
+	a.flags = NIL_P(flags) ? dflags : NUM2UINT(flags) | dflags;
+
+	for (;;) {
+		a.fd_in = check_fileno(io_in);
+		a.fd_out = check_fileno(io_out);
+		bytes = (ssize_t)IO_RUN(nogvl_tee, &a);
+		if (bytes < 0) {
+			if (errno == EINTR)
+				continue;
+			if (total > 0)
+				return total;
+			return bytes;
+		} else if (bytes == 0) {
+			break;
+		} else {
+			return bytes;
+		}
+	}
+
+	return total;
+}
+
+/*
+ * call-seq:
+ *   SleepyPenguin.tee(io_in, io_out, len) => integer
+ *   SleepyPenguin.tee(io_in, io_out, len, flags) => integer
+ *
+ * Copies up to +len+ bytes of data from +io_in+ to +io_out+.  +io_in+
+ * and +io_out+ must both refer to pipe descriptors.  +io_in+ and +io_out+
+ * may not be endpoints of the same pipe.
+ *
+ * +flags+ may be zero (the default) or a combination of:
+ * * SleepyPenguin::F_NONBLOCK
+ *
+ * Other splice-related flags are currently unimplemented or have no effect.
+ *
+ * Returns the number of bytes duplicated if successful.
+ * Raises EOFError when +io_in+ is closed and emptied.
+ * Raises Errno::EAGAIN when +io_in+ is empty and/or +io_out+ is full
+ * and +flags+ contains SleepyPenguin::F_NONBLOCK
+ *
+ * Consider using SleepyPenguin.trytee if you are using
+ * SleepyPenguin::F_NONBLOCK as it avoids the cost of raising
+ * common Errno::EAGAIN exceptions.
+ *
+ * See manpage for full documentation:
+ * http://kernel.org/doc/man-pages/online/pages/man2/tee.2.html
+ */
+static VALUE my_tee(int argc, VALUE *argv, VALUE self)
+{
+	ssize_t n = do_tee(argc, argv, 0);
+
+	if (n == 0)
+		rb_eof_error();
+	if (n < 0)
+		rb_sys_fail("tee");
+
+	return SSIZET2NUM(n);
+}
+
+/*
+ * call-seq:
+ *    SleepyPenguin.trytee(io_in, io_out, len) => integer
+ *    SleepyPenguin.trytee(io_in, io_out, len, flags) => integer
+ *
+ * Exactly like SleepyPenguin.tee, except +:EAGAIN+ is returned when either
+ * the read or write end would block instead of raising Errno::EAGAIN.
+ *
+ * SleepyPenguin::F_NONBLOCK is always passed for the pipe descriptor,
+ * but this can still block if the non-pipe descriptor is blocking.
+ *
+ * See SleepyPenguin.tee documentation for more details.
+ */
+static VALUE trytee(int argc, VALUE *argv, VALUE self)
+{
+	ssize_t n = do_tee(argc, argv, SPLICE_F_NONBLOCK);
+
+	if (n == 0)
+		return Qnil;
+	if (n < 0) {
+		if (errno == EAGAIN)
+			return sym_EAGAIN;
+		rb_sys_fail("tee");
+	}
+
+	return SSIZET2NUM(n);
+}
+
+void sleepy_penguin_init_splice(void)
+{
+	VALUE mod = rb_define_module("SleepyPenguin");
+	rb_define_singleton_method(mod, "splice", my_splice, -1);
+	rb_define_singleton_method(mod, "trysplice", trysplice, -1);
+	rb_define_singleton_method(mod, "tee", my_tee, -1);
+	rb_define_singleton_method(mod, "trytee", trytee, -1);
+
+	/*
+	 * Attempt to move pages instead of copying.  This is only a hint
+	 * and support for it was removed in Linux 2.6.21.  It will be
+         * re-added for FUSE filesystems only in Linux 2.6.35.
+	 */
+	rb_define_const(mod, "F_MOVE", UINT2NUM(SPLICE_F_MOVE));
+
+	/*
+	 * Do not block on pipe I/O.  This flag only affects the pipe(s)
+	 * being spliced from/to and has no effect on the non-pipe
+	 * descriptor (which requires non-blocking operation to be set
+	 * explicitly).
+	 *
+	 * The non-blocking flag (O_NONBLOCK) on the pipe descriptors
+	 * themselves are ignored by this family of functions, and
+	 * using this flag is the only way to get non-blocking operation
+	 * out of them.
+	 *
+	 * It is highly recommended this flag be set
+         * (or SleepyPenguin.trysplice used)
+	 * whenever splicing from a socket into a pipe unless there is
+	 * another (native) thread or process doing a blocking read on that
+	 * pipe.  Otherwise it is possible to block a single-threaded process
+	 * if the socket buffers are larger than the pipe buffers.
+	 */
+	rb_define_const(mod, "F_NONBLOCK", UINT2NUM(SPLICE_F_NONBLOCK));
+
+	/*
+	 * Indicate that there may be more data coming into the outbound
+	 * descriptor.  This can allow the kernel to avoid sending partial
+	 * frames from sockets.  Currently only used with splice.
+	 */
+	rb_define_const(mod, "F_MORE", UINT2NUM(SPLICE_F_MORE));
+
+	/*
+	 * The maximum size of an atomic write to a pipe
+	 * POSIX requires this to be at least 512 bytes.
+	 * Under Linux, this is 4096 bytes.
+	 */
+	rb_define_const(mod, "PIPE_BUF", UINT2NUM(PIPE_BUF));
+
+	/*
+	 * fcntl() command constant used to return the size of a pipe.
+	 * This constant is only defined when running Linux 2.6.35
+	 * or later.  For convenience, use IO#pipe_size instead.
+	 */
+	rb_define_const(mod, "F_GETPIPE_SZ", UINT2NUM(F_GETPIPE_SZ));
+
+	/*
+	 * fcntl() command constant used to set the size of a pipe.
+	 * This constant is only defined when running Linux 2.6.35
+	 * or later.  For convenience, use IO#pipe_size= instead.
+	 */
+	rb_define_const(mod, "F_SETPIPE_SZ", UINT2NUM(F_SETPIPE_SZ));
+
+	sym_EAGAIN = ID2SYM(rb_intern("EAGAIN"));
+}
+#endif
diff --git a/ext/sleepy_penguin/util.c b/ext/sleepy_penguin/util.c
index 2c17e1a..4086b14 100644
--- a/ext/sleepy_penguin/util.c
+++ b/ext/sleepy_penguin/util.c
@@ -118,6 +118,9 @@ int rb_sp_fileno(VALUE io)
 {
 	rb_io_t *fptr;
 
+	if (RB_TYPE_P(io, T_FIXNUM))
+		return FIX2INT(io);
+
 	io = rb_io_get_io(io);
 	GetOpenFile(io, fptr);
 	return FPTR_TO_FD(fptr);
diff --git a/test/test_splice.rb b/test/test_splice.rb
new file mode 100644
index 0000000..475ba8b
--- /dev/null
+++ b/test/test_splice.rb
@@ -0,0 +1,254 @@
+# -*- encoding: binary -*-
+require 'test/unit'
+require 'tempfile'
+require 'socket'
+require 'io/nonblock'
+require 'timeout'
+$-w = true
+require 'sleepy_penguin'
+
+class TestSplice < Test::Unit::TestCase
+
+  def test_splice
+    str = 'abcde'
+    size = 5
+    rd, wr = IO.pipe
+    tmp = Tempfile.new('ruby_splice')
+
+    assert_equal 5, tmp.syswrite(str)
+    tmp.sysseek(0)
+
+    nr = SleepyPenguin.splice(tmp.fileno, nil, wr.fileno, nil, size, 0)
+    assert_equal size, nr
+    assert_equal str, rd.sysread(size)
+  end
+
+  def test_splice_io
+    str = 'abcde'
+    size = 5
+    rd, wr = IO.pipe
+    tmp = Tempfile.new('ruby_splice')
+
+    assert_equal 5, tmp.syswrite(str)
+    tmp.sysseek(0)
+
+    nr = SleepyPenguin.splice(tmp, nil, wr, nil, size, 0)
+    assert_equal size, nr
+    assert_equal str, rd.sysread(size)
+  end
+
+  def test_splice_io_noflags
+    str = 'abcde'
+    size = 5
+    rd, wr = IO.pipe
+    tmp = Tempfile.new('ruby_splice')
+
+    assert_equal 5, tmp.syswrite(str)
+    tmp.sysseek(0)
+
+    nr = SleepyPenguin.splice(tmp, nil, wr, nil, size)
+    assert_equal size, nr
+    assert_equal str, rd.sysread(size)
+  end
+
+  def test_trysplice_io_noflags
+    str = 'abcde'
+    size = 5
+    rd, wr = IO.pipe
+    tmp = Tempfile.new('ruby_splice')
+
+    assert_equal 5, tmp.syswrite(str)
+    tmp.sysseek(0)
+
+    nr = SleepyPenguin.trysplice(tmp, nil, wr, nil, size)
+    assert_equal size, nr
+    assert_equal str, rd.sysread(size)
+  end
+
+  def test_splice_io_ish
+    str = 'abcde'
+    size = 5
+    rd, wr = IO.pipe
+    tmp = Tempfile.new('ruby_splice')
+    io_ish = [ tmp ]
+    def io_ish.to_io
+      first.to_io
+    end
+
+    assert_equal 5, tmp.syswrite(str)
+    tmp.sysseek(0)
+
+    nr = SleepyPenguin.splice(io_ish, nil, wr, nil, size, 0)
+    assert_equal size, nr
+    assert_equal str, rd.sysread(size)
+  end
+
+  def test_splice_in_offset
+    str = 'abcde'
+    off = 3
+    len = 2
+    rd, wr = IO.pipe
+    tmp = Tempfile.new('ruby_splice')
+
+    assert_equal 5, tmp.syswrite(str)
+    tmp.sysseek(0)
+
+    nr = SleepyPenguin.splice(tmp.fileno, off, wr.fileno, nil, len, 0)
+    assert_equal len, nr
+    assert_equal 'de', rd.sysread(len)
+  end
+
+  def test_splice_out_offset
+    str = 'abcde'
+    rd, wr = IO.pipe
+    tmp = Tempfile.new('ruby_splice')
+
+    assert_equal 5, wr.syswrite(str)
+    nr = SleepyPenguin.splice(rd.fileno, nil, tmp.fileno, 3, str.size, 0)
+    assert_equal 5, nr
+    tmp.sysseek(0)
+    assert_equal "\0\0\0abcde", tmp.sysread(9)
+  end
+
+  def test_splice_nonblock
+    rd, wr = IO.pipe
+    tmp = Tempfile.new('ruby_splice')
+
+    assert_raises(Errno::EAGAIN) {
+      SleepyPenguin.splice(rd.fileno, nil, tmp.fileno, 0, 5,
+                           SleepyPenguin::F_NONBLOCK)
+    }
+  end
+
+  def test_trysplice_nonblock
+    rd, wr = IO.pipe
+    tmp = Tempfile.new('ruby_splice')
+    assert_equal :EAGAIN,
+           SleepyPenguin.trysplice(rd, nil, tmp, 0, 5,
+                                   SleepyPenguin::F_NONBLOCK)
+  end
+
+  def test_trysplice_nonblock_noargs
+    rd, wr = IO.pipe
+    tmp = Tempfile.new('ruby_splice')
+    assert_equal :EAGAIN, SleepyPenguin.trysplice(rd, nil, tmp, 0, 5)
+    assert_equal :EAGAIN, SleepyPenguin.trysplice(rd, nil, tmp, 0, 5,
+                                                  SleepyPenguin::F_MORE)
+  end
+
+  def test_splice_eof
+    rd, wr = IO.pipe
+    tmp = Tempfile.new('ruby_splice')
+    wr.syswrite 'abc'
+    wr.close
+
+    nr = SleepyPenguin.splice(rd.fileno, nil, tmp.fileno, 0, 5,
+                              SleepyPenguin::F_NONBLOCK)
+    assert_equal 3, nr
+    assert_raises(EOFError) {
+      SleepyPenguin.splice(rd.fileno, nil, tmp.fileno, 0, 5,
+                           SleepyPenguin::F_NONBLOCK)
+    }
+  end
+
+  def test_trysplice_eof
+    rd, wr = IO.pipe
+    tmp = Tempfile.new('ruby_splice')
+    wr.syswrite 'abc'
+    wr.close
+
+    nr = SleepyPenguin.trysplice(rd, nil, tmp, 0, 5, SleepyPenguin::F_NONBLOCK)
+    assert_equal 3, nr
+    assert_nil SleepyPenguin.trysplice(rd, nil, tmp, 0, 5,
+                                       SleepyPenguin::F_NONBLOCK)
+  end
+
+  def test_splice_nonblock_socket
+    server = TCPServer.new('127.0.0.1', 0)
+    port = server.addr[1]
+    rp, wp = IO.pipe
+    rs = TCPSocket.new('127.0.0.1', port)
+    rs.nonblock = true
+    assert_raises(Errno::EAGAIN) {
+      SleepyPenguin.splice(rs, nil, wp, nil, 1024, 0)
+    }
+    rs.close
+    server.close
+  end
+
+  def test_tee
+    str = 'abcde'
+    size = 5
+    rda, wra = IO.pipe
+    rdb, wrb = IO.pipe
+
+    assert_equal 5, wra.syswrite(str)
+    nr = SleepyPenguin.tee(rda.fileno, wrb.fileno, size, 0)
+    assert_equal 5, nr
+    assert_equal str, rdb.sysread(5)
+    assert_equal str, rda.sysread(5)
+  end
+
+  def test_trytee
+    str = 'abcde'
+    size = 5
+    rda, wra = IO.pipe
+    rdb, wrb = IO.pipe
+
+    assert_equal 5, wra.syswrite(str)
+    nr = SleepyPenguin.trytee(rda, wrb, size, 0)
+    assert_equal 5, nr
+    assert_equal str, rdb.sysread(5)
+    assert_equal str, rda.sysread(5)
+  end
+
+  def test_tee_eof
+    rda, wra = IO.pipe
+    rdb, wrb = IO.pipe
+    wra.close
+    assert_raises(EOFError) {
+      SleepyPenguin.tee(rda.fileno, wrb.fileno, 4096, 0)
+    }
+  end
+
+  def test_trytee_eof
+    rda, wra = IO.pipe
+    rdb, wrb = IO.pipe
+    wra.close
+    assert_nil SleepyPenguin.trytee(rda, wrb, 4096)
+  end
+
+  def test_tee_nonblock
+    rda, wra = IO.pipe
+    rdb, wrb = IO.pipe
+    assert_raises(Errno::EAGAIN) {
+      SleepyPenguin.tee(rda.fileno, wrb.fileno, 4096, SleepyPenguin::F_NONBLOCK)
+    }
+  end
+
+  def test_trytee_nonblock
+    rda, wra = IO.pipe
+    rdb, wrb = IO.pipe
+    assert_equal :EAGAIN, SleepyPenguin.trytee(rda, wrb, 4096)
+  end
+
+  def test_tee_io
+    str = 'abcde'
+    size = 5
+    rda, wra = IO.pipe
+    rdb, wrb = IO.pipe
+
+    assert_equal 5, wra.syswrite(str)
+    nr = SleepyPenguin.tee(rda, wrb, size, 0)
+    assert_equal 5, nr
+    assert_equal str, rdb.sysread(5)
+    assert_equal str, rda.sysread(5)
+  end
+
+  def test_constants
+    assert SleepyPenguin::PIPE_BUF > 0
+    %w(move nonblock more).each { |x|
+      assert Integer === SleepyPenguin.const_get("F_#{x.upcase}")
+    }
+  end
+end
diff --git a/test/test_splice_eintr.rb b/test/test_splice_eintr.rb
new file mode 100644
index 0000000..3a5d96f
--- /dev/null
+++ b/test/test_splice_eintr.rb
@@ -0,0 +1,34 @@
+# -*- encoding: binary -*-
+require 'test/unit'
+require 'tempfile'
+require 'socket'
+require 'sleepy_penguin'
+require 'timeout'
+$-w = true
+Thread.abort_on_exception = true
+
+class Test_Splice_EINTR < Test::Unit::TestCase
+  def setup
+    @usr1 = 0
+    trap(:USR1) { @usr1 += 1 }
+  end
+
+  def teardown
+    trap(:USR1, "DEFAULT")
+  end
+
+  def test_EINTR_splice_read
+    rd, wr = IO.pipe
+    tmp = Tempfile.new 'splice-read'
+    main = Thread.current
+    Thread.new do
+      sleep 0.01
+      Process.kill(:USR1, $$)
+      sleep 0.01
+      wr.write "HI"
+    end
+    nr = SleepyPenguin.splice rd, nil, tmp, nil, 666
+    assert_equal 2, nr
+    assert_equal 1, @usr1
+  end
+end if defined?(RUBY_ENGINE)
-- 
EW


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] implement copy_file_range support for Linux 4.5+
  2016-03-16  3:13 [sleepy_penguin PATCH 0/2] splice/tee/copy_file_range support Eric Wong
  2016-03-16  3:13 ` [PATCH 1/2] support the splice(2) and tee(2) syscalls Eric Wong
@ 2016-03-16  3:13 ` Eric Wong
  1 sibling, 0 replies; 4+ messages in thread
From: Eric Wong @ 2016-03-16  3:13 UTC (permalink / raw)
  To: sleepy-penguin; +Cc: ruby-io-splice, Eric Wong

Under Linux 4.5, this allows for efficient copies and
is similar to the splice and sendfile system calls.
---
 ext/sleepy_penguin/cfr.c      | 68 +++++++++++++++++++++++++++++++++++++++++++
 ext/sleepy_penguin/extconf.rb |  1 +
 ext/sleepy_penguin/init.c     |  2 ++
 ext/sleepy_penguin/sp_copy.h  | 50 +++++++++++++++++++++++++++++++
 ext/sleepy_penguin/splice.c   | 54 ++--------------------------------
 test/test_cfr.rb              | 29 ++++++++++++++++++
 6 files changed, 153 insertions(+), 51 deletions(-)
 create mode 100644 ext/sleepy_penguin/cfr.c
 create mode 100644 ext/sleepy_penguin/sp_copy.h
 create mode 100644 test/test_cfr.rb

diff --git a/ext/sleepy_penguin/cfr.c b/ext/sleepy_penguin/cfr.c
new file mode 100644
index 0000000..ea17f82
--- /dev/null
+++ b/ext/sleepy_penguin/cfr.c
@@ -0,0 +1,68 @@
+#include "sleepy_penguin.h"
+#include "sp_copy.h"
+#include <unistd.h>
+
+#ifndef HAVE_COPY_FILE_RANGE
+#  include <sys/syscall.h>
+#  if !defined(__NR_copy_file_range) && \
+	(defined(__x86_64__) || defined(__i386__))
+#    define __NR_copy_file_range 285
+#  endif /* __NR_copy_file_range */
+#endif
+
+#ifdef __NR_copy_file_range
+static ssize_t my_cfr(int fd_in, off_t *off_in, int fd_out, off_t *off_out,
+		       size_t len, unsigned int flags)
+{
+	long n = syscall(__NR_copy_file_range,
+			fd_in, off_in, fd_out, off_out, len, flags);
+
+	return (ssize_t)n;
+}
+#  define copy_file_range(fd_in,off_in,fd_out,off_out,len,flags) \
+		my_cfr((fd_in),(off_in),(fd_out),(off_out),(len),(flags))
+#endif
+
+static void *nogvl_cfr(void *ptr)
+{
+	struct copy_args *a = ptr;
+
+	return (void *)copy_file_range(a->fd_in, a->off_in,
+				a->fd_out, a->off_out, a->len, a->flags);
+}
+
+static VALUE rb_cfr(int argc, VALUE *argv, VALUE mod)
+{
+	off_t i, o;
+	VALUE io_in, off_in, io_out, off_out, len, flags;
+	ssize_t bytes;
+	struct copy_args a;
+
+	rb_scan_args(argc, argv, "51",
+	             &io_in, &off_in, &io_out, &off_out, &len, &flags);
+
+	a.off_in = NIL_P(off_in) ? NULL : (i = NUM2OFFT(off_in), &i);
+	a.off_out = NIL_P(off_out) ? NULL : (o = NUM2OFFT(off_out), &o);
+	a.len = NUM2SIZET(len);
+	a.flags = NIL_P(flags) ? 0 : NUM2UINT(flags);
+
+again:
+	a.fd_in = rb_sp_fileno(io_in);
+	a.fd_out = rb_sp_fileno(io_out);
+	bytes = (ssize_t)IO_RUN(nogvl_cfr, &a);
+	if (bytes < 0) {
+		if (errno == EINTR)
+			goto again;
+		rb_sys_fail("copy_file_range");
+	} else if (bytes == 0) {
+		rb_eof_error();
+	}
+	return SSIZET2NUM(bytes);
+}
+
+void sleepy_penguin_init_cfr(void)
+{
+	VALUE mod = rb_define_module("SleepyPenguin");
+
+	rb_define_singleton_method(mod, "copy_file_range", rb_cfr, -1);
+}
diff --git a/ext/sleepy_penguin/extconf.rb b/ext/sleepy_penguin/extconf.rb
index 46d1059..53a2810 100644
--- a/ext/sleepy_penguin/extconf.rb
+++ b/ext/sleepy_penguin/extconf.rb
@@ -19,6 +19,7 @@
 end
 have_type('clockid_t', 'time.h')
 have_func('clock_gettime', 'time.h')
+have_func('copy_file_range', 'unistd.h')
 have_func('epoll_create1', %w(sys/epoll.h))
 have_func('inotify_init1', %w(sys/inotify.h))
 have_func('splice', %w(fcntl.h))
diff --git a/ext/sleepy_penguin/init.c b/ext/sleepy_penguin/init.c
index 93e8092..0a1458b 100644
--- a/ext/sleepy_penguin/init.c
+++ b/ext/sleepy_penguin/init.c
@@ -53,6 +53,7 @@ void sleepy_penguin_init_signalfd(void);
 #endif
 
 void sleepy_penguin_init_splice(void);
+void sleepy_penguin_init_cfr(void);
 
 static size_t l1_cache_line_size_detect(void)
 {
@@ -130,4 +131,5 @@ void Init_sleepy_penguin_ext(void)
 	sleepy_penguin_init_inotify();
 	sleepy_penguin_init_signalfd();
 	sleepy_penguin_init_splice();
+	sleepy_penguin_init_cfr();
 }
diff --git a/ext/sleepy_penguin/sp_copy.h b/ext/sleepy_penguin/sp_copy.h
new file mode 100644
index 0000000..83b9554
--- /dev/null
+++ b/ext/sleepy_penguin/sp_copy.h
@@ -0,0 +1,50 @@
+/* common splice and copy_file_range-related definitions */
+
+#ifndef SSIZET2NUM
+#  define SSIZET2NUM(x) LONG2NUM(x)
+#endif
+#ifndef NUM2SIZET
+#  define NUM2SIZET(x) NUM2ULONG(x)
+#endif
+
+#if defined(HAVE_RB_THREAD_CALL_WITHOUT_GVL) && defined(HAVE_RUBY_THREAD_H)
+/* Ruby 2.0+ */
+#  include <ruby/thread.h>
+#  define WITHOUT_GVL(fn,a,ubf,b) \
+        rb_thread_call_without_gvl((fn),(a),(ubf),(b))
+#elif defined(HAVE_RB_THREAD_BLOCKING_REGION)
+typedef VALUE (*my_blocking_fn_t)(void*);
+#  define WITHOUT_GVL(fn,a,ubf,b) \
+	rb_thread_blocking_region((my_blocking_fn_t)(fn),(a),(ubf),(b))
+
+#else /* Ruby 1.8 */
+/* partial emulation of the 1.9 rb_thread_blocking_region under 1.8 */
+#  include <rubysig.h>
+#  define RUBY_UBF_IO ((rb_unblock_function_t *)-1)
+typedef void rb_unblock_function_t(void *);
+typedef void * rb_blocking_function_t(void *);
+static void * WITHOUT_GVL(rb_blocking_function_t *func, void *data1,
+			rb_unblock_function_t *ubf, void *data2)
+{
+	void *rv;
+
+	assert(RUBY_UBF_IO == ubf && "RUBY_UBF_IO required for emulation");
+
+	TRAP_BEG;
+	rv = func(data1);
+	TRAP_END;
+
+	return rv;
+}
+#endif /* ! HAVE_RB_THREAD_BLOCKING_REGION */
+
+#define IO_RUN(fn,data) WITHOUT_GVL((fn),(data),RUBY_UBF_IO,0)
+
+struct copy_args {
+	int fd_in;
+	int fd_out;
+	off_t *off_in;
+	off_t *off_out;
+	size_t len;
+	unsigned flags;
+};
diff --git a/ext/sleepy_penguin/splice.c b/ext/sleepy_penguin/splice.c
index d2f9206..2f901a8 100644
--- a/ext/sleepy_penguin/splice.c
+++ b/ext/sleepy_penguin/splice.c
@@ -1,4 +1,5 @@
 #include "sleepy_penguin.h"
+#include "sp_copy.h"
 #ifdef HAVE_SPLICE
 #include <errno.h>
 #include <fcntl.h>
@@ -18,13 +19,6 @@ static VALUE sym_EAGAIN;
 #  define F_GETPIPE_SZ    (F_LINUX_SPECIFIC_BASE + 8)
 #endif
 
-#ifndef SSIZET2NUM
-#  define SSIZET2NUM(x) LONG2NUM(x)
-#endif
-#ifndef NUM2SIZET
-#  define NUM2SIZET(x) NUM2ULONG(x)
-#endif
-
 static int check_fileno(VALUE io)
 {
 	int saved_errno = errno;
@@ -33,51 +27,9 @@ static int check_fileno(VALUE io)
 	return fd;
 }
 
-#if defined(HAVE_RB_THREAD_CALL_WITHOUT_GVL) && defined(HAVE_RUBY_THREAD_H)
-/* Ruby 2.0+ */
-#  include <ruby/thread.h>
-#  define WITHOUT_GVL(fn,a,ubf,b) \
-        rb_thread_call_without_gvl((fn),(a),(ubf),(b))
-#elif defined(HAVE_RB_THREAD_BLOCKING_REGION)
-typedef VALUE (*my_blocking_fn_t)(void*);
-#  define WITHOUT_GVL(fn,a,ubf,b) \
-	rb_thread_blocking_region((my_blocking_fn_t)(fn),(a),(ubf),(b))
-
-#else /* Ruby 1.8 */
-/* partial emulation of the 1.9 rb_thread_blocking_region under 1.8 */
-#  include <rubysig.h>
-#  define RUBY_UBF_IO ((rb_unblock_function_t *)-1)
-typedef void rb_unblock_function_t(void *);
-typedef void * rb_blocking_function_t(void *);
-static void * WITHOUT_GVL(rb_blocking_function_t *func, void *data1,
-			rb_unblock_function_t *ubf, void *data2)
-{
-	void *rv;
-
-	assert(RUBY_UBF_IO == ubf && "RUBY_UBF_IO required for emulation");
-
-	TRAP_BEG;
-	rv = func(data1);
-	TRAP_END;
-
-	return rv;
-}
-#endif /* ! HAVE_RB_THREAD_BLOCKING_REGION */
-
-#define IO_RUN(fn,data) WITHOUT_GVL((fn),(data),RUBY_UBF_IO,0)
-
-struct splice_args {
-	int fd_in;
-	int fd_out;
-	off_t *off_in;
-	off_t *off_out;
-	size_t len;
-	unsigned flags;
-};
-
 static void *nogvl_splice(void *ptr)
 {
-	struct splice_args *a = ptr;
+	struct copy_args *a = ptr;
 
 	return (void *)splice(a->fd_in, a->off_in, a->fd_out, a->off_out,
 	                     a->len, a->flags);
@@ -87,7 +39,7 @@ static ssize_t do_splice(int argc, VALUE *argv, unsigned dflags)
 {
 	off_t i = 0, o = 0;
 	VALUE io_in, off_in, io_out, off_out, len, flags;
-	struct splice_args a;
+	struct copy_args a;
 	ssize_t bytes;
 	ssize_t total = 0;
 
diff --git a/test/test_cfr.rb b/test/test_cfr.rb
new file mode 100644
index 0000000..3483c5a
--- /dev/null
+++ b/test/test_cfr.rb
@@ -0,0 +1,29 @@
+# -*- encoding: binary -*-
+require 'test/unit'
+require 'tempfile'
+$-w = true
+require 'sleepy_penguin'
+
+class TestCfr < Test::Unit::TestCase
+  def test_copy_file_range
+    str = 'abcde'
+    size = 5
+    src = Tempfile.new('ruby_cfr_src')
+    dst = Tempfile.new('ruby_cfr_dst')
+    assert_equal 5, src.syswrite(str)
+    src.sysseek(0)
+    begin
+      nr = SleepyPenguin.copy_file_range(src, nil, dst, nil, size, 0)
+    rescue Errno::EINVAL
+      warn 'copy_file_range not supported (requires Linux 4.5+)'
+      warn "We have: #{`uname -a`}"
+      return
+    end
+    assert_equal nr, 5
+    dst.sysseek(0)
+    assert_equal str, dst.sysread(5)
+  ensure
+    dst.close!
+    src.close!
+  end
+end if SleepyPenguin.respond_to?(:copy_file_range)
-- 
EW


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] support the splice(2) and tee(2) syscalls
  2016-03-16  3:13 ` [PATCH 1/2] support the splice(2) and tee(2) syscalls Eric Wong
@ 2017-01-02  2:31   ` Eric Wong
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2017-01-02  2:31 UTC (permalink / raw)
  To: sleepy-penguin; +Cc: ruby-io-splice

Eric Wong <e@80x24.org> wrote:
> Since these are Linux-specific syscalls, it makes sense to
> include it here.  This is taken from the "io_splice" RubyGem,
> but this may supercede that.

Note, I've pushed this out, but I'm not happy with the existing
interfaces.  I'll probably switch to a keyword-arguments-driven
approach for optional arguments.

This would require us to drop Ruby 1.9.x (and earlier) support
and require Ruby 2.0+ and later; at least for these new
features.

So what's in "master" of git://bogomips.org/sleepy_penguin.git
should not be considered stable.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-01-02  2:31 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-16  3:13 [sleepy_penguin PATCH 0/2] splice/tee/copy_file_range support Eric Wong
2016-03-16  3:13 ` [PATCH 1/2] support the splice(2) and tee(2) syscalls Eric Wong
2017-01-02  2:31   ` Eric Wong
2016-03-16  3:13 ` [PATCH 2/2] implement copy_file_range support for Linux 4.5+ Eric Wong

Code repositories for project(s) associated with this public inbox

	https://yhbt.net/sleepy_penguin.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).