about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-06-01 03:06:56 +0000
committerEric Wong <e@80x24.org>2016-06-01 03:07:33 +0000
commita19f6bf70866e9fed34c7220f8a83d8486102821 (patch)
tree7d528c84b5489dd488a82014af755235edcd39f6
parentd413151f5c0e3ccd3c7c7fe9d1db9112e7e83561 (diff)
downloadcmogstored-a19f6bf70866e9fed34c7220f8a83d8486102821.tar.gz
In case "/bin/sh" or "/dev/null" becomes unavailable during the
lifetime of cmogstored, we will no longer crash when attempting
to (re)start iostat.  However, your system is probably hosed
anyways if "/bin/sh" or "/dev/null" become unavailable.

This also fixes a bug where we would leak the iostat pipe if
either fork/vfork fails.

We also close an innocuous race condition where the child
might toggle flags in the parent process and trigger an
extra wakeup.

Finally, we use sigprocmask in the child in case pthread_sigmask
does not not work on some systems after forking.  This is likely
only a cosmetic change.
-rw-r--r--cloexec_from.c4
-rw-r--r--iostat_process.c100
-rw-r--r--svc.c6
-rw-r--r--util.h2
4 files changed, 67 insertions, 45 deletions
diff --git a/cloexec_from.c b/cloexec_from.c
index 444e665..8aa6a92 100644
--- a/cloexec_from.c
+++ b/cloexec_from.c
@@ -5,11 +5,11 @@
 #include "cmogstored.h"
 
 /*
- * this function is only called in a forked child (for iostat)
+ * this function is only called in a vforked child (for iostat)
  * if O_CLOEXEC/SOCK_CLOEXEC is unsupported, or if mog_cloexec_detect()
  * detects those flags are broken.
  */
-void mog_cloexec_from(int lowfd)
+void mog_cloexec_from(int lowfd) /* vfork-safe */
 {
         int fd;
         int last_good = lowfd;
diff --git a/iostat_process.c b/iostat_process.c
index d8766e0..9f98077 100644
--- a/iostat_process.c
+++ b/iostat_process.c
@@ -14,6 +14,12 @@ static time_t iostat_last_fail;
 static struct mog_iostat *iostat;
 static time_t iostat_fail_timeout = 10;
 
+enum mog_exerr {
+        MOG_EXERR_DUP2 = 3,
+        MOG_EXERR_SIGPROCMASK = 4,
+        MOG_EXERR_EXECVE = 5
+};
+
 static void iostat_atexit(void)
 {
         if (iostat_pid > 0)
@@ -40,8 +46,7 @@ static int iostat_pipe_init(int *fds)
         return 0;
 }
 
-/* only called in the child process */
-static const char * exec_cmd(const char *cmd)
+static const char *exec_cmd(const char *cmd)
 {
         time_t last_fail = time(NULL) - iostat_last_fail;
         time_t delay = iostat_fail_timeout - last_fail;
@@ -55,7 +60,7 @@ static const char * exec_cmd(const char *cmd)
         return xasprintf("sleep %d; exec %s", (int)delay, cmd);
 }
 
-static void dup2_or_die(int oldfd, int newfd, const char *errdesc)
+static int dup2_retry(int oldfd, int newfd) /* vfork-safe */
 {
         int rc;
 
@@ -63,57 +68,62 @@ static void dup2_or_die(int oldfd, int newfd, const char *errdesc)
                 rc = dup2(oldfd, newfd);
         while (rc < 0 && (errno == EINTR || errno == EBUSY));
 
-        if (rc < 0) {
-                syslog(LOG_CRIT, "dup2(%s) failed: %m", errdesc);
-                _exit(1);
-        }
+        return rc;
 }
 
-static void preexec_redirect(int out_fd)
+static void execve_iostat(int out_fd, const char *cmd) /* vfork-safe */
 {
-        int null_fd;
-
-        dup2_or_die(out_fd, STDOUT_FILENO, "iostat_pipe[1],STDOUT");
-        mog_close(out_fd);
-
-        null_fd = open("/dev/null", O_RDONLY);
-        if (null_fd < 0) {
-                syslog(LOG_CRIT, "open(/dev/null) failed: %m");
-                abort();
-        }
-        dup2_or_die(null_fd, STDIN_FILENO, "/dev/null,STDIN");
-        mog_close(null_fd);
-
-        /* don't touch stderr */
+        int i;
+        union {
+                char *argv[4];
+                char const *in[4];
+        } u;
+
+        u.in[0] = "/bin/sh";
+        u.in[1] = "-c";
+        u.in[2] = cmd;
+        u.in[3] = 0;
+
+        if (dup2_retry(out_fd, STDOUT_FILENO) < 0)
+                _exit(MOG_EXERR_DUP2);
+        if (!mog_cloexec_atomic)
+                mog_cloexec_from(STDERR_FILENO + 1);
+
+        /* ignore errors, not much we can do about missing signals */
+        for (i = 1; i < NSIG; i++)
+                (void)signal(i, SIG_DFL);
+
+        if (sigprocmask(SIG_SETMASK, &mog_emptyset, NULL) != 0)
+                _exit(MOG_EXERR_SIGPROCMASK);
+
+        execve("/bin/sh", u.argv, environ);
+        _exit(MOG_EXERR_EXECVE);
 }
 
 static pid_t iostat_fork_exec(int out_fd)
 {
         /* rely on /bin/sh to parse iostat command-line args */
         const char *cmd = getenv("MOG_IOSTAT_CMD");
+        int cs;
+
         if (!cmd)
                 cmd = "iostat -dx 1 30";
 
         cmd = exec_cmd(cmd);
-
+        CHECK(int, 0, pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs));
         iostat_pid = mog_fork_for_exec();
-        if (iostat_pid < 0) {
+        if (iostat_pid < 0)
                 syslog(LOG_ERR, "fork() for iostat failed: %m");
-        } else if (iostat_pid > 0) {
+        else if (iostat_pid == 0) /* child */
+                execve_iostat(out_fd, cmd);
+
+        /* parent */
+        CHECK(int, 0, pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, 0));
+        if (iostat_pid > 0)
                 mog_process_register(iostat_pid, MOG_PROC_IOSTAT);
-                mog_close(out_fd);
-        } else {
-                /* child */
-                preexec_redirect(out_fd);
-                if (! mog_cloexec_atomic)
-                        mog_cloexec_from(STDERR_FILENO + 1);
-
-                mog_intr_enable();
-                execl("/bin/sh", "sh", "-c", cmd, (char *)NULL);
-                syslog(LOG_CRIT, "execl(%s) failed: %m", cmd);
-                _exit(1);
-        }
+        mog_close(out_fd);
         mog_free(cmd);
+
         return iostat_pid;
 }
 
@@ -122,8 +132,20 @@ bool mog_iostat_respawn(int oldstatus)
         int fds[2];
         struct mog_fd *mfd;
 
-        if (WIFEXITED(oldstatus) && WEXITSTATUS(oldstatus) == 0) {
-                /* syslog(LOG_DEBUG, "iostat done, restarting"); */
+        if (WIFEXITED(oldstatus)) {
+                int ex = WEXITSTATUS(oldstatus);
+                const char *fn = 0;
+
+                switch (ex) {
+                case 0: break;
+                case MOG_EXERR_DUP2: fn = "dup2"; break;
+                case MOG_EXERR_SIGPROCMASK: fn = "sigprocmask"; break;
+                case MOG_EXERR_EXECVE: fn = "execve"; break;
+                default: fn = "(unknown)";
+                }
+                if (fn)
+                        syslog(LOG_ERR, "iostat exited due to %s failure", fn);
+                /* else syslog(LOG_DEBUG, "iostat done, restarting"); */
         } else {
                 iostat_last_fail = time(NULL);
                 syslog(LOG_WARNING,
diff --git a/svc.c b/svc.c
index a0c2921..43bc356 100644
--- a/svc.c
+++ b/svc.c
@@ -166,14 +166,14 @@ size_t mog_svc_each(Hash_processor processor, void *data)
         return rv;
 }
 
-static bool cloexec_disable(struct mog_fd *mfd)
+static bool cloexec_disable(struct mog_fd *mfd) /* vfork-safe */
 {
         if (mfd)
                 CHECK(int, 0, mog_set_cloexec(mfd->fd, false));
         return true;
 }
 
-static bool svc_cloexec_off_i(void *svcptr, void *unused)
+static bool svc_cloexec_off_i(void *svcptr, void *unused) /* vfork-safe */
 {
         struct mog_svc *svc = svcptr;
 
@@ -186,7 +186,7 @@ static bool svc_cloexec_off_i(void *svcptr, void *unused)
  * Only call this from a freshly forked upgrade child process.
  * This holds no locks to avoid potential deadlocks in post-fork mutexes
  */
-void mog_svc_upgrade_prepare(void)
+void mog_svc_upgrade_prepare(void) /* vfork-safe */
 {
         (void)hash_do_for_each(by_docroot, svc_cloexec_off_i, NULL);
 }
diff --git a/util.h b/util.h
index dda0da5..526eda3 100644
--- a/util.h
+++ b/util.h
@@ -63,7 +63,7 @@ static inline int mog_set_nonblocking(int fd, const bool value)
  * ever get defined, we wouldn't be using them in the first place without
  * updating this code... (no way they'd be on by default).
  */
-static inline int mog_set_cloexec(int fd, const bool set)
+static inline int mog_set_cloexec(int fd, const bool set) /* vfork-safe */
 {
         return fcntl(fd, F_SETFD, set ? FD_CLOEXEC : 0);
 }