about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2013-01-30 02:55:29 +0000
committerEric Wong <normalperson@yhbt.net>2013-01-31 03:33:04 +0000
commit088138b235e79fa54a4e3602a4d60975e9581571 (patch)
tree56e37cf2603bbe552cd84248aeecaefd94fb625d
parent2b252bb6b4704be01d629194aff588b24d579cdd (diff)
downloadcmogstored-088138b235e79fa54a4e3602a4d60975e9581571.tar.gz
USR2 now forks a new cmogstored process which inherits
listener file descriptors from the parent.  The parent
renames its pidfile with a ".oldbin" suffix so the new
child can use the new PID file.

Clusters may now upgrade to future versions of cmogstored
without needing to mark hosts down via mogadm.

The behavior of this process should match that of nginx:
http://wiki.nginx.org/CommandLine#Upgrading_To_a_New_Binary_On_The_Fly
-rw-r--r--Makefile.am1
-rw-r--r--cmogstored.c47
-rw-r--r--cmogstored.h6
-rw-r--r--pidfile.c251
-rw-r--r--test/cmogstored-cfg.rb9
-rw-r--r--test/ruby.mk2
-rw-r--r--test/upgrade.rb150
-rw-r--r--upgrade.c145
8 files changed, 577 insertions, 34 deletions
diff --git a/Makefile.am b/Makefile.am
index 39011aa..09e712f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -83,6 +83,7 @@ mog_src += svc_dev.c
 mog_src += thrpool.c
 mog_src += trywrite.c
 mog_src += util.h
+mog_src += upgrade.c
 mog_src += warn.c
 
 LDADD = $(LIBINTL) $(top_builddir)/lib/libgnu.a libnostd.a
diff --git a/cmogstored.c b/cmogstored.c
index 46cd9b4..e4a0f89 100644
--- a/cmogstored.c
+++ b/cmogstored.c
@@ -13,9 +13,11 @@ const char *argp_program_bug_address = PACKAGE_BUGREPORT;
 const char *argp_program_version = THIS" "PACKAGE_VERSION;
 static sig_atomic_t sigchld_hit;
 static sig_atomic_t do_exit;
+static sig_atomic_t do_upgrade;
 static size_t nthr;
 static bool have_mgmt;
 static pid_t master_pid;
+static pid_t upgrade_pid;
 static unsigned long worker_processes;
 static bool iostat_running;
 
@@ -246,7 +248,8 @@ MOG_NOINLINE static void setup(int argc, char *argv[])
                 setlogmask(mask);
         }
 
-        if (mog_cli.daemonize)
+        /* don't daemonize if we're inheriting FDs, we're already daemonized */
+        if (mog_cli.daemonize && !getenv("CMOGSTORED_FD"))
                 daemonize();
 
         if (pid_fd >= 0 && mog_pidfile_commit(pid_fd) < 0)
@@ -310,6 +313,7 @@ static bool svc_start_each(void *svcptr, void *qptr)
 static void worker_wakeup_handler(int signum)
 {
         switch (signum) {
+        case SIGUSR2: do_upgrade = 1; break;
         case SIGCHLD: sigchld_hit = 1; break;
         case SIGQUIT:
         case SIGTERM:
@@ -327,6 +331,7 @@ static void wakeup_noop(int signum)
 static void master_wakeup_handler(int signum)
 {
         switch (signum) {
+        case SIGUSR2: do_upgrade = 1; break;
         case SIGQUIT:
         case SIGTERM:
         case SIGINT:
@@ -379,7 +384,20 @@ static void sigchld_handler(void)
         }
 }
 
-static void main_loop(const pid_t parent)
+static void upgrade_handler(void)
+{
+        do_upgrade = 0;
+        if (upgrade_pid > 0) {
+                syslog(LOG_INFO, "upgrade already running on PID:%d",
+                       upgrade_pid);
+        } else {
+                if (master_pid == getpid())
+                        upgrade_pid = mog_upgrade_spawn();
+                /* else: worker processes (if configured) do not upgrade */
+        }
+}
+
+static void main_worker_loop(const pid_t parent)
 {
         mog_cancel_disable(); /* mog_idleq_wait() now relies on this */
         while (parent == 0 || parent == getppid()) {
@@ -396,6 +414,8 @@ static void main_loop(const pid_t parent)
                          * startup, but became usable later
                          */
                         iostat_running = mog_iostat_respawn(0);
+                if (do_upgrade)
+                        upgrade_handler();
         }
 
         syslog(LOG_INFO, "parent=%d abandoned us, dying", parent);
@@ -416,7 +436,7 @@ static void run_worker(const pid_t parent)
                 if (!iostat_running)
                         syslog(LOG_WARNING, "iostat(1) not available/running");
         }
-        main_loop(parent);
+        main_worker_loop(parent);
 }
 
 static void fork_worker(unsigned worker_id)
@@ -480,6 +500,13 @@ static void process_died(pid_t pid, int status)
                        "master process registered iostat process");
                 iostat_died(pid, status);
                 return;
+        case MOG_PROC_UPGRADE:
+                assert(pid == upgrade_pid && "upgrade_pid misplaced");
+                syslog(LOG_INFO, "upgrade PID:%d exited with status=%d",
+                       pid, status);
+                mog_pidfile_upgrade_abort();
+                upgrade_pid = -1;
+                return;
         default:
                 /* could be an inherited iostat if we're using worker+master */
                 name = mog_process_name(id);
@@ -501,8 +528,12 @@ static void run_master(void)
                 fork_worker(id);
 
         while (! do_exit || mog_kill_each_worker(SIGQUIT) > 0) {
-                int status;
-                pid_t pid = waitpid(-1, &status, 0);
+                int status = 0;
+                pid_t pid;
+
+                if (do_upgrade)
+                        upgrade_handler();
+                pid = waitpid(-1, &status, 0);
 
                 if (pid > 0)
                         process_died(pid, status);
@@ -513,11 +544,17 @@ static void run_master(void)
                         default: syslog(LOG_WARNING, "waitpid failed: %m");
                         }
                 }
+                if (do_upgrade)
+                        upgrade_handler();
         }
+        /* upgrade on our way out */
+        if (do_upgrade)
+                upgrade_handler();
 }
 
 int main(int argc, char *argv[], char *envp[])
 {
+        mog_upgrade_prepare(argc, argv, envp);
         /* hack for older gcov + gcc, see nostd/setproctitle.h */
         spt_init(argc, argv, envp);
         set_program_name(argv[0]);
diff --git a/cmogstored.h b/cmogstored.h
index ac18c69..01b1233 100644
--- a/cmogstored.h
+++ b/cmogstored.h
@@ -328,6 +328,8 @@ void * mog_trysend(int fd, void *buf, size_t len, off_t more);
 /* pidfile.c */
 int mog_pidfile_prepare(const char *path);
 int mog_pidfile_commit(int fd);
+bool mog_pidfile_upgrade_prepare(void);
+void mog_pidfile_upgrade_abort(void);
 
 /* svc_dev.c */
 bool mog_svc_devstats_broadcast(void *svc, void *ignored);
@@ -526,3 +528,7 @@ bool mog_process_is_worker(unsigned id);
 size_t mog_kill_each_worker(int signo);
 void mog_process_register(pid_t, unsigned id);
 unsigned mog_process_reaped(pid_t);
+
+/* upgrade.c */
+void mog_upgrade_prepare(int argc, char *argv[], char *envp[]);
+pid_t mog_upgrade_spawn(void);
diff --git a/pidfile.c b/pidfile.c
index a32a26c..2e01c42 100644
--- a/pidfile.c
+++ b/pidfile.c
@@ -4,11 +4,66 @@
  */
 #include "cmogstored.h"
 static const char *pidfile;
+static bool pidfile_exists;
+static const char *old;
+static pid_t owner;
+#ifndef O_CLOEXEC
+#define O_CLOEXEC (0)
+#endif
 
-static void pidfile_atexit(void)
+static bool pid_is_running(pid_t pid)
 {
-        unlink(pidfile);
-        mog_free(pidfile);
+        if (pid <= 0)
+                return false;
+        if (kill(pid, 0) < 0 && errno == ESRCH)
+                return false;
+        return true;
+}
+
+/* sets errno on failure */
+static bool pid_write(int fd)
+{
+        errno = 0;
+        return !(dprintf(fd, "%d\n", (int)getpid()) <= 1 || errno == ENOSPC);
+}
+
+/* returns 0 if pidfile is empty, -1 on error, pid value on success */
+static pid_t pidfile_read(int fd)
+{
+        pid_t pid = -1;
+        char buf[sizeof(pid_t) * 8 / 3 + 1];
+        ssize_t r;
+        char *end;
+        long tmp;
+
+        errno = 0;
+        r = pread(fd, buf, sizeof(buf), 0);
+        if (r == 0)
+                pid = 0; /* empty file */
+        if (r > 0) {
+                errno = 0;
+                tmp = strtol(buf, &end, 10);
+
+                if (*end == '\n' && tmp > 0 && tmp < LONG_MAX)
+                        pid = (pid_t)tmp;
+        }
+
+        return pid;
+}
+
+static void pidfile_cleanup(void)
+{
+        if (pidfile) {
+                if (getpid() == owner) {
+                        if (old)
+                                unlink(old);
+                        else if (pidfile_exists)
+                                unlink(pidfile);
+                }
+                /* else: don't unlink if it does not belong to us */
+                mog_free_and_null(&pidfile);
+                mog_free_and_null(&old);
+        }
 }
 
 /*
@@ -29,29 +84,29 @@ static void pidfile_atexit(void)
 static int mog_pidfile_open(const char *path, pid_t *cur)
 {
         int fd = open(path, O_RDWR|O_CREAT, 0666);
-        struct stat sb;
-        char buf[sizeof(pid_t) * 8 / 3 + 1];
-        ssize_t r;
+        pid_t pid;
 
-        if (fd < 0) return fd;
-        if (fstat(fd, &sb) < 0) goto err;
+        *cur = -1;
+        if (fd < 0)
+                return fd;
 
-        r = pread(fd, buf, sizeof(buf), 0);
-        if (r < 0) goto err;
-        if (r > 0) {
-                char *end;
-                long tmp = strtol(buf, &end, 10);
-
-                if (*end == '\n' && tmp > 0 && tmp < INT_MAX) {
-                        *cur = tmp;
-                        if (kill(*cur, 0) < 0 && errno == ESRCH) {
-                                *cur = -1;
-                                goto out;
-                        }
-                        errno = EAGAIN;
-                        goto err;
-                }
-        } /* else r == 0 => nothing to kill */
+        /* see if existing pidfile is valid */
+        pid = pidfile_read(fd);
+        if (pid == 0) {
+                /*
+                 * existing pidfile is empty, FS could've been full earlier,
+                 * proceed assuming we can overwrite
+                 */
+        } else if (pid > 0) {
+                /* can't signal it, (likely) safe to overwrite */
+                if (!pid_is_running(pid))
+                        goto out;
+
+                /* old pidfile is still valid */
+                errno = EAGAIN;
+                *cur = pid;
+                goto err;
+        }
 
 out:
         assert(pidfile == NULL && "already opened pidfile for process");
@@ -59,6 +114,7 @@ out:
         if (!pidfile)
                 goto err;
 
+        pidfile_exists = true;
         return fd;
 err:
         PRESERVE_ERRNO( close(fd) );
@@ -77,16 +133,17 @@ int mog_pidfile_commit(int fd)
         assert(pidfile && "mog_pidfile_open not called (or unsuccessful)");
 
         errno = 0;
-        if (dprintf(fd, "%d\n", (int)getpid()) <= 1 || errno == ENOSPC) {
+        if (!pid_write(fd)) {
                 PRESERVE_ERRNO( close(fd) );
                 if (errno == ENOSPC)
-                        PRESERVE_ERRNO( pidfile_atexit() );
+                        PRESERVE_ERRNO( pidfile_cleanup() );
                 return -1;
         }
         if (close(fd) < 0 && errno != EINTR)
                 return -1;
 
-        atexit(pidfile_atexit);
+        owner = getpid();
+        atexit(pidfile_cleanup);
 
         return 0;
 }
@@ -101,6 +158,144 @@ int mog_pidfile_prepare(const char *path)
         if (errno == EAGAIN)
                 die("already running on PID: %d", (int)cur_pid);
         else
-                die_errno("mog_pidfile_open failed");
+                die_errno("mog_pidfile_prepare failed");
         return -1;
 }
+
+/* returns true if successful (or path is non-existent) */
+static bool unlink_if_owner_or_unused(const char *path)
+{
+        pid_t pid;
+        int fd = open(path, O_RDONLY|O_CLOEXEC);
+
+        if (fd < 0) {
+                /* somebody mistakenly removed path while we were running */
+                if (errno == ENOENT)
+                        return true;
+                syslog(LOG_ERR, "open(%s): %m failed", path);
+                return false;
+        }
+
+        pid = pidfile_read(fd);
+        PRESERVE_ERRNO( mog_close(fd) );
+
+        if (pid == 0) {
+                /*
+                 * existing path is empty, FS could've been full earlier,
+                 * proceed assuming we can overwrite
+                 */
+        } else if (pid > 0) {
+                if (pid == getpid())
+                        goto do_unlink;
+                if (!pid_is_running(pid))
+                        goto do_unlink;
+                syslog(LOG_ERR,
+                       "cannot unlink %s belongs to running PID:%d",
+                       path, (int)pid);
+                return false;
+        } else {
+                /* can't unlink pidfile safely */
+                syslog(LOG_ERR, "failed to read/parse %s: %m", path);
+                return false;
+        }
+do_unlink:
+        /* ENOENT: maybe somebody else just unlinked it */
+        if (unlink(path) == 0 || errno == ENOENT)
+                return true;
+
+        syslog(LOG_ERR, "failed to remove %s for upgrade: %m", path);
+        return false;
+}
+
+/* replaces (non-atomically) current pidfile with pidfile.oldbin */
+bool mog_pidfile_upgrade_prepare(void)
+{
+        pid_t pid = -1;
+        int fd;
+
+        if (!pidfile)
+                return true;
+
+        assert(owner == getpid() &&
+               "mog_pidfile_upgrade_prepare called by non-owner");
+
+        if (!unlink_if_owner_or_unused(pidfile))
+                return false;
+
+        assert(old == NULL && "oldbin already registered");
+        old = xasprintf("%s.oldbin", pidfile);
+        fd = open(old, O_CREAT|O_RDWR|O_CLOEXEC, 0666);
+        if (fd < 0) {
+                syslog(LOG_ERR, "failed to open pidfile %s: %m", old);
+                mog_free_and_null(&old);
+                return false;
+        }
+        pid = pidfile_read(fd);
+        if (pid_is_running(pid)) {
+                syslog(LOG_ERR,
+                       "upgrade failed, %s belongs to running PID:%d",
+                       old, (int)pid);
+                mog_free_and_null(&old);
+        } else if (pid_write(fd)) {
+                /* success writing, don't touch old */
+        } else {
+                syslog(LOG_ERR, "failed to write pidfile %s: %m", old);
+                mog_free_and_null(&old);
+        }
+
+        PRESERVE_ERRNO( mog_close(fd) );
+        return old ? true : false;
+}
+
+static bool upgrade_failed(void)
+{
+        pid_t pid;
+        int fd = open(pidfile, O_RDONLY|O_CLOEXEC);
+
+        /* pidfile no longer exists, good */
+        if (fd < 0)
+                return true;
+
+        pid = pidfile_read(fd);
+        PRESERVE_ERRNO( mog_close(fd) );
+
+        /* save to overwrite */
+        if (!pid_is_running(pid))
+                return true;
+
+        assert(old && "we are stuck on oldbin");
+        syslog(LOG_ERR, "PID:%d of upgrade still running", pid);
+        return false;
+}
+
+/* removes oldbin file and restores original pidfile */
+void mog_pidfile_upgrade_abort(void)
+{
+        int fd;
+
+        if (!pidfile)
+                return;
+
+        assert(owner == getpid() &&
+               "mog_pidfile_upgrade_abort called by non-owner");
+
+        /* ensure the pidfile of the upgraded process is really invalid */
+        if (!upgrade_failed())
+                return;
+        if (!unlink_if_owner_or_unused(old))
+                return;
+        mog_free_and_null(&old);
+
+        fd = open(pidfile, O_TRUNC|O_CREAT|O_WRONLY|O_CLOEXEC, 0666);
+        if (fd >= 0) {
+                pidfile_exists = true;
+                if (!pid_write(fd))
+                        syslog(LOG_ERR, "failed to write %s: %m", pidfile);
+                mog_close(fd);
+                mog_free_and_null(&old);
+        } else {
+                /* we're pidless(!) */
+                syslog(LOG_ERR, "failed to open %s for writing: %m", pidfile);
+                pidfile_exists = false;
+        }
+}
diff --git a/test/cmogstored-cfg.rb b/test/cmogstored-cfg.rb
index 3b30234..ba413a2 100644
--- a/test/cmogstored-cfg.rb
+++ b/test/cmogstored-cfg.rb
@@ -97,6 +97,15 @@ class TestCmogstoredConfig < Test::Unit::TestCase
       end
     end
 
+    # ensure USR2 (upgrade for master) is no-op for children
+    running = children
+    10.times do
+      running.each do |pid|
+        Process.kill(:USR2, pid)
+      end
+      pre_kill # ensure workers are still running
+    end
+
     Process.kill(:QUIT, @pid)
     _, status = Process.waitpid2(@pid)
     assert status.success?, status.inspect
diff --git a/test/ruby.mk b/test/ruby.mk
index a710bab..b3323ca 100644
--- a/test/ruby.mk
+++ b/test/ruby.mk
@@ -1,5 +1,5 @@
 RB_TESTS_FAST = test/cmogstored-cfg.rb test/http_dav.rb test/http_range.rb \
-  test/http_put.rb test/http_getonly.rb test/inherit.rb
+  test/http_put.rb test/http_getonly.rb test/inherit.rb test/upgrade.rb
 RB_TESTS_SLOW = test/mgmt-usage.rb test/mgmt.rb test/mgmt-iostat.rb \
  test/http.rb test/http_put_slow.rb test/http_chunked_put.rb \
  test/graceful_quit.rb test/http_idle_expire.rb
diff --git a/test/upgrade.rb b/test/upgrade.rb
new file mode 100644
index 0000000..643e561
--- /dev/null
+++ b/test/upgrade.rb
@@ -0,0 +1,150 @@
+#!/usr/bin/env ruby
+# -*- encoding: binary -*-
+# Copyright (C) 2012-2013, Eric Wong <normalperson@yhbt.net>
+# License: GPLv3 or later (see COPYING for details)
+require 'test/test_helper'
+require 'net/http'
+require 'timeout'
+
+class TestUpgrade < Test::Unit::TestCase
+  def setup
+    @start_pid = $$
+    @tmpdir = Dir.mktmpdir('cmogstored-upgrade-test')
+    @to_close = []
+    @host = TEST_HOST
+    http = TCPServer.new(@host, 0)
+    @http_port = http.addr[1]
+    mgmt = TCPServer.new(@host, 0)
+    @mgmt_port = mgmt.addr[1]
+    @err = Tempfile.new("stderr")
+    pid = Tempfile.new(%w(upgrade .pid))
+    @pid_path = pid.path
+    @to_close << @err
+    @old = "#@pid_path.oldbin"
+  ensure
+    mgmt.close if mgmt
+    http.close if http
+  end
+
+  def teardown
+    return if $$ != @start_pid
+    if @pid_path && File.exist?(@pid_path)
+      warn "#@pid_path exists"
+      pid = File.read(@pid_path).to_i rescue 0
+      if Process.kill(0, pid)
+        warn "Failed to kill #{pid}, Nuking"
+        Process.kill(:KILL, pid)
+        wait_for_death(pid)
+      end
+    end
+    w = File.read(@err.path).strip
+    warn(w) if w.size > 0
+    @to_close.each { |io| io.close unless io.closed? }
+    FileUtils.rm_rf(@tmpdir)
+  end
+
+  def upgrade_prepare_full(wp = nil)
+    cmd = [ "cmogstored", "--docroot=#@tmpdir", "--pidfile=#@pid_path",
+            "--daemonize", "--maxconns=500",
+            "--mgmtlisten=#@host:#@mgmt_port",
+            "--httplisten=#@host:#@http_port" ]
+    cmd << "--worker-processes=#{wp}" if wp
+    tmp_pid = fork do
+      $stderr.reopen(@err.path)
+      exec(*cmd)
+    end
+    _, status = Process.waitpid2(tmp_pid)
+    assert status.success?, status.inspect
+
+    assert_http_running
+    old_pid = assert_pidfile_valid(@pid_path)
+
+    # start the upgrade
+    Process.kill(:USR2, old_pid)
+    Timeout.timeout(30) do
+      sleep(0.01) until File.exist?(@old) && File.exist?(@pid_path)
+    end
+
+    # both old and new should be running
+    first_pid = assert_pidfile_valid(@old)
+    assert_equal old_pid, first_pid
+    assert File.exist?(@pid_path)
+    new_pid = assert_pidfile_valid(@pid_path)
+    assert new_pid != old_pid
+    [ old_pid, new_pid ]
+  end
+
+  def test_upgrade_kill(new_sig = :QUIT, wp = nil)
+    old_pid, new_pid = upgrade_prepare_full(wp)
+    Process.kill(new_sig, new_pid)
+    wait_for_death(new_pid)
+    Timeout.timeout(30) { sleep(0.01) while File.exist?(@old) }
+    Timeout.timeout(30) { sleep(0.01) until File.exist?(@pid_path) }
+    orig_pid = assert_pidfile_valid(@pid_path)
+    assert_equal old_pid, orig_pid
+    Process.kill(:QUIT, orig_pid)
+    wait_for_death(orig_pid)
+  end
+
+  def test_upgrade_kill_KILL(wp = nil)
+    test_upgrade_kill(:KILL, wp)
+  end
+
+  def test_upgrade_kill_ABRT(wp = nil)
+    test_upgrade_kill(:ABRT, wp)
+  end
+
+  def test_upgrade_normal(wp = nil)
+    old_pid, new_pid = upgrade_prepare_full(wp)
+    Process.kill(:QUIT, old_pid)
+    wait_for_death(old_pid)
+    Process.kill(0, new_pid)
+    assert_http_running
+    mgmt = TCPSocket.new(TEST_HOST, @mgmt_port)
+    mgmt.write "shutdown\n"
+    Timeout.timeout(30) { assert_nil mgmt.gets }
+    wait_for_death(new_pid)
+  end
+
+  def test_upgrade_kill_KILL_worker_process
+    test_upgrade_kill_KILL(1)
+  end
+
+  def test_upgrade_kill_ABRT_worker_process
+    test_upgrade_kill_ABRT(1)
+  end
+
+  def test_upgrade_kill_QUIT_worker_process
+    test_upgrade_kill(:QUIT, 1)
+  end
+
+  def test_upgrade_normal_worker_process
+    test_upgrade_normal(1)
+  end
+
+  def wait_for_death(pid, seconds = 30)
+    Timeout.timeout(seconds) do
+      begin
+        Process.kill(0, pid)
+        sleep(0.01)
+      rescue Errno::ESRCH
+        break
+      end while true
+    end
+  end
+
+  def assert_http_running
+    # make sure process is running and signals are ready
+    Net::HTTP.start(@host, @http_port) do |http|
+      req = Net::HTTP::Get.new("/")
+      resp = http.request(req)
+      assert_kind_of Net::HTTPOK, resp
+    end
+  end
+
+  def assert_pidfile_valid(path)
+    pid = File.read(path).to_i
+    assert_operator pid, :>, 0
+    pid
+  end
+end
diff --git a/upgrade.c b/upgrade.c
new file mode 100644
index 0000000..3dc7975
--- /dev/null
+++ b/upgrade.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2012-2013, Eric Wong <normalperson@yhbt.net>
+ * License: GPLv3 or later (see COPYING for details)
+ */
+#include "cmogstored.h"
+#include "compat_memstream.h"
+
+static struct {
+        char **argv;
+        char **envp;
+} start;
+
+void mog_upgrade_prepare(int argc, char *argv[], char *envp[])
+{
+        int i;
+        size_t env_count = 1; /* extra for NULL-termination */
+        char **e;
+
+        /* duplicate argv */
+        start.argv = xmalloc(sizeof(char *) * (argc + 1));
+        for (i = 0; i < argc; i++)
+                start.argv[i] = xstrdup(argv[i]);
+        start.argv[argc] = NULL;
+
+        /* allocate slots for envp */
+        for (e = envp; *e; e++)
+                env_count++;
+        start.envp = xmalloc(sizeof(char *) * env_count);
+
+        /* duplicate envp */
+        e = start.envp;
+        while (*envp)
+                *e++ = xstrdup(*envp++);
+        *e = NULL;
+}
+
+/* writes one comma-delimited fd to fp */
+static bool emit_fd(FILE *fp, int fd)
+{
+        int r;
+
+        /* no error, just the FD isn't used */
+        if (fd < 0)
+                return true;
+
+        errno = 0;
+        r = fprintf(fp, "%d,", fd);
+        if (r > 0)
+                return true;
+        if (errno == 0)
+                errno = ENOSPC;
+        syslog(LOG_ERR, "fprintf() failed: %m");
+        return false;
+}
+
+static bool svc_emit_fd_i(void *svcptr, void *_fp)
+{
+        FILE *fp = _fp;
+        struct mog_svc *svc = svcptr;
+
+        return (emit_fd(fp, svc->mgmt_fd)
+                && emit_fd(fp, svc->http_fd)
+                && emit_fd(fp, svc->httpget_fd));
+}
+
+static bool cloexec_disable(int fd)
+{
+        if (fd >= 0)
+                CHECK(int, 0, mog_set_cloexec(fd, false));
+        return true;
+}
+
+static bool svc_cloexec_off_i(void *svcptr, void *unused)
+{
+        struct mog_svc *svc = svcptr;
+
+        return (cloexec_disable(svc->mgmt_fd)
+                && cloexec_disable(svc->http_fd)
+                && cloexec_disable(svc->httpget_fd));
+}
+
+/* returns the PID of the newly spawned child */
+pid_t mog_upgrade_spawn(void)
+{
+        pid_t pid = -1;
+        FILE *fp;
+        size_t bytes;
+        char *dst = NULL;
+        int rc;
+
+        if (!mog_pidfile_upgrade_prepare())
+                return pid;
+
+        fp = open_memstream(&dst, &bytes);
+        if (fp == NULL) {
+                syslog(LOG_ERR, "open_memstream failed for upgrade: %m");
+                return pid;
+        }
+
+        errno = 0;
+        rc = fputs("CMOGSTORED_FD=", fp);
+        if (rc < 0 || rc == EOF) {
+                if (errno == 0)
+                        errno = ferror(fp);
+                PRESERVE_ERRNO( (void)fclose(fp) );
+                syslog(LOG_ERR, "fputs returned %d on memstream: %m", rc);
+                goto out;
+        }
+
+        mog_svc_each(svc_emit_fd_i, fp);
+        errno = 0;
+        if ((my_memstream_close(fp, &dst, &bytes) != 0) && (errno != EINTR)) {
+                syslog(LOG_ERR, "fclose on memstream failed for upgrade: %m");
+                goto out;
+        }
+
+        assert(dst[bytes - 1] == ',' && "not comma-terminated no listeners?");
+        dst[bytes - 1] = '\0'; /* kill the last comma */
+
+        pid = fork();
+        if (pid == 0) {
+                char **e = start.envp;
+
+                while (*e)
+                        CHECK(int, 0, putenv(*e++));
+
+                /* CMOGSTORED_FD= is set here */
+                CHECK(int, 0, putenv(dst));
+
+                mog_svc_each(svc_cloexec_off_i, NULL);
+                mog_intr_enable();
+                execvp(start.argv[0], start.argv);
+                die_errno("execvp %s", start.argv[0]);
+        } else if (pid > 0) {
+                mog_process_register(pid, MOG_PROC_UPGRADE);
+                syslog(LOG_INFO, "upgrade spawned PID:%d", pid);
+        } else {
+                syslog(LOG_ERR, "fork failed for upgrade: %m");
+        }
+
+out:
+        free(dst);
+
+        return pid;
+}