about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2013-10-15 02:30:44 +0000
committerEric Wong <normalperson@yhbt.net>2013-11-11 07:51:08 +0000
commitebf312e250ffbb9390df1878a4ad9f3cf106f1f9 (patch)
tree9340d5999a93c962f33e0238e0c01be24e8e1995
parentb7bda87ead4a53bb792dbbfb6079aad8cd4170de (diff)
downloadcmogstored-malloc.tar.gz
glibc malloc creates arenas aggressively to avoid malloc contention.
This is good for CPU-bound multithreaded programs which are
malloc-dependent.  However cmogstored uses multiple threads for
concurrent disk/FS activity and avoids malloc in hot/common paths.
Thus malloc should _never_ be a bottleneck for cmogstored.  Although
physical memory allocation is lazy on Linux kernels, the metadata
overhead of the virtually allocated pages can still add up on a
system with many disks/devices.

I've observed 6-7G VmSize on cmogstored processes with only ~5M VmRSS
on machines with many cores/devices and a few hundred clients.
-rw-r--r--alloc.c27
-rw-r--r--configure.ac4
2 files changed, 31 insertions, 0 deletions
diff --git a/alloc.c b/alloc.c
index ba48ff7..11a7de5 100644
--- a/alloc.c
+++ b/alloc.c
@@ -9,6 +9,10 @@
  * even with normal, untrusted HTTP traffic.
  */
 #include "cmogstored.h"
+#ifdef HAVE_MALLOC_H
+#  include <malloc.h>
+#endif
+
 #define L1_CACHE_LINE_MAX 128 /* largest I've seen (Pentium 4) */
 static size_t l1_cache_line_size = L1_CACHE_LINE_MAX;
 
@@ -39,6 +43,29 @@ void mog_alloc_quit(void)
 
 __attribute__((constructor)) static void alloc_init(void)
 {
+/*
+ * glibc malloc creates arenas aggressively to avoid malloc contention.
+ * This is good for CPU-bound multithreaded programs which are
+ * malloc-dependent.  However cmogstored uses multiple threads for
+ * concurrent disk/FS activity and avoids malloc in hot/common paths.
+ * Thus malloc should _never_ be a bottleneck for cmogstored.  Although
+ * physical memory allocation is lazy on Linux kernels, the metadata
+ * overhead of the virtually allocated pages can still add up on a
+ * system with many disks/devices.
+ * I've observed 6-7G VmSize on cmogstored processes with only ~5M VmRSS
+ * on machines with many cores/devices and a few hundred clients.
+ */
+#if defined(HAVE_MALLOPT) && defined(M_ARENA_MAX) && defined(M_ARENA_TEST)
+        unsigned long n = num_processors(NPROC_CURRENT);
+        static const unsigned long my_max = 4;
+
+        if (!getenv("MALLOC_ARENA_MAX"))
+                mallopt(M_ARENA_MAX, n > my_max ? my_max : n);
+
+        if (!getenv("MALLOC_ARENA_TEST"))
+                mallopt(M_ARENA_TEST, 1);
+#endif /* glibc malloc tuning */
+
         l1_cache_line_size_detect();
         atexit(mog_alloc_quit);
 }
diff --git a/configure.ac b/configure.ac
index 1911f4d..f9b3e7a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -55,6 +55,10 @@ dnl gnulib doesn't actually define SOCK_NONBLOCK/SOCK_CLOEXEC, and
 dnl even if it did, emulation wouldn't be thread-safe
 AC_CHECK_FUNCS([accept4])
 
+dnl needed for glibc malloc tuning
+AC_CHECK_HEADERS([malloc.h])
+AC_CHECK_FUNCS([mallopt])
+
 AC_SUBST(NOSTD_CFLAGS)
 
 # This works for all platforms we care about: