about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2013-02-15 11:04:55 +0000
committerEric Wong <normalperson@yhbt.net>2013-02-16 12:44:38 +0000
commit13cbdcea65248271668562064aafdcc9634ef9ce (patch)
tree5277ac1a8682b244d008fe574ffa7a215bb3062a
parentfcb41385271818586a162d02aeb23bc3414a602e (diff)
downloadcmogstored-13cbdcea65248271668562064aafdcc9634ef9ce.tar.gz
pthread_create may return EAGAIN as a temporary failure,
do not abort a running process if this is the case.

For the initial mountlist scan, we must retry indefinitely for
cmogstored to be usable.  However, with our thread pools, we can
always run fewer threads (as long as there is at least one
thread per-pool).
-rw-r--r--mnt.c24
-rw-r--r--thrpool.c40
2 files changed, 57 insertions, 7 deletions
diff --git a/mnt.c b/mnt.c
index 57f5e07..09a2030 100644
--- a/mnt.c
+++ b/mnt.c
@@ -143,6 +143,7 @@ static void * init_once(void *ptr)
 /* once-only initialization */
 static void timed_init_once(void)
 {
+        int rc;
         pthread_t thr;
         unsigned long tries;
         struct init_args ia = {
@@ -151,11 +152,25 @@ static void timed_init_once(void)
         };
 
         CHECK(int, 0, pthread_mutex_lock(&ia.cond_lock));
-        CHECK(int, 0, pthread_create(&thr, NULL, init_once, &ia));
 
-        for (tries = 1; ; tries++) {
+        for (tries = 0; ;) {
+                rc = pthread_create(&thr, NULL, init_once, &ia);
+                if (rc == 0)
+                        break;
+
+                /* this must succeed, keep looping */
+                if (rc == EAGAIN) {
+                        if ((++tries % 1024) == 0)
+                                warn("pthread_create: %s (tries: %lu)",
+                                     strerror(rc), tries);
+                        sched_yield();
+                } else {
+                        assert(0 && "pthread_create usage error");
+                }
+        }
+
+        for (tries = 0; ;) {
                 struct timespec ts;
-                int rc;
 
                 gettime(&ts);
                 ts.tv_sec += 5;
@@ -164,7 +179,8 @@ static void timed_init_once(void)
                 if (rc == 0)
                         break;
                 if (rc == ETIMEDOUT)
-                        warn("still populating mountlist (tries: %lu)", tries);
+                        warn("still populating mountlist (tries: %lu)",
+                             ++tries);
                 else if (rc == EINTR)
                         continue;
                 else
diff --git a/thrpool.c b/thrpool.c
index 00c4586..718c568 100644
--- a/thrpool.c
+++ b/thrpool.c
@@ -46,13 +46,38 @@ static void poke(pthread_t thr, int sig)
         assert(err == ESRCH && "pthread_kill() usage bug");
 }
 
+static bool
+thr_create_fail_retry(struct mog_thrpool *tp, size_t size,
+                      unsigned long *nr_eagain, int err)
+{
+        /* do not leave the pool w/o threads at all */
+        if (tp->n_threads == 0) {
+                if ((++*nr_eagain % 1024) == 0) {
+                        errno = err;
+                        syslog(LOG_ERR, "pthread_create: %m (tries: %lu)",
+                               *nr_eagain);
+                }
+                sched_yield();
+                return true;
+        } else {
+                errno = err;
+                syslog(LOG_ERR,
+                       "pthread_create: %m, only running %lu of %lu threads",
+                       (unsigned long)tp->n_threads, (unsigned long)size);
+                return false;
+        }
+}
+
 static void thrpool_set_size(struct mog_thrpool *tp, size_t size)
 {
+        unsigned long nr_eagain = 0;
+
         CHECK(int, 0, pthread_mutex_lock(&tp->lock));
         while (size > tp->n_threads) {
                 pthread_t *thr;
                 pthread_attr_t attr;
                 size_t bytes = (tp->n_threads + 1) * sizeof(pthread_t);
+                int rc;
 
                 tp->threads = xrealloc(tp->threads, bytes);
 
@@ -65,10 +90,18 @@ static void thrpool_set_size(struct mog_thrpool *tp, size_t size)
 
                 thr = tp->threads + tp->n_threads;
 
-                CHECK(int, 0,
-                      pthread_create(thr, &attr, tp->start_fn, tp->start_arg));
+                rc = pthread_create(thr, &attr, tp->start_fn, tp->start_arg);
                 CHECK(int, 0, pthread_attr_destroy(&attr));
-                tp->n_threads++;
+
+                if (rc == 0) {
+                        tp->n_threads++;
+                        nr_eagain = 0;
+                } else if (rc == EAGAIN) {
+                        if (!thr_create_fail_retry(tp, size, &nr_eagain, rc))
+                                goto out;
+                } else {
+                        assert(rc == 0 && "pthread_create usage error");
+                }
         }
 
         if (tp->n_threads > size) {
@@ -95,6 +128,7 @@ static void thrpool_set_size(struct mog_thrpool *tp, size_t size)
                 }
                 tp->n_threads = size;
         }
+out:
         CHECK(int, 0, pthread_mutex_unlock(&tp->lock));
 }