From ebf312e250ffbb9390df1878a4ad9f3cf106f1f9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Oct 2013 02:30:44 +0000 Subject: default malloc tuning for glibc glibc malloc creates arenas aggressively to avoid malloc contention. This is good for CPU-bound multithreaded programs which are malloc-dependent. However cmogstored uses multiple threads for concurrent disk/FS activity and avoids malloc in hot/common paths. Thus malloc should _never_ be a bottleneck for cmogstored. Although physical memory allocation is lazy on Linux kernels, the metadata overhead of the virtually allocated pages can still add up on a system with many disks/devices. I've observed 6-7G VmSize on cmogstored processes with only ~5M VmRSS on machines with many cores/devices and a few hundred clients. --- alloc.c | 27 +++++++++++++++++++++++++++ configure.ac | 4 ++++ 2 files changed, 31 insertions(+) diff --git a/alloc.c b/alloc.c index ba48ff7..11a7de5 100644 --- a/alloc.c +++ b/alloc.c @@ -9,6 +9,10 @@ * even with normal, untrusted HTTP traffic. */ #include "cmogstored.h" +#ifdef HAVE_MALLOC_H +# include +#endif + #define L1_CACHE_LINE_MAX 128 /* largest I've seen (Pentium 4) */ static size_t l1_cache_line_size = L1_CACHE_LINE_MAX; @@ -39,6 +43,29 @@ void mog_alloc_quit(void) __attribute__((constructor)) static void alloc_init(void) { +/* + * glibc malloc creates arenas aggressively to avoid malloc contention. + * This is good for CPU-bound multithreaded programs which are + * malloc-dependent. However cmogstored uses multiple threads for + * concurrent disk/FS activity and avoids malloc in hot/common paths. + * Thus malloc should _never_ be a bottleneck for cmogstored. Although + * physical memory allocation is lazy on Linux kernels, the metadata + * overhead of the virtually allocated pages can still add up on a + * system with many disks/devices. + * I've observed 6-7G VmSize on cmogstored processes with only ~5M VmRSS + * on machines with many cores/devices and a few hundred clients. + */ +#if defined(HAVE_MALLOPT) && defined(M_ARENA_MAX) && defined(M_ARENA_TEST) + unsigned long n = num_processors(NPROC_CURRENT); + static const unsigned long my_max = 4; + + if (!getenv("MALLOC_ARENA_MAX")) + mallopt(M_ARENA_MAX, n > my_max ? my_max : n); + + if (!getenv("MALLOC_ARENA_TEST")) + mallopt(M_ARENA_TEST, 1); +#endif /* glibc malloc tuning */ + l1_cache_line_size_detect(); atexit(mog_alloc_quit); } diff --git a/configure.ac b/configure.ac index 1911f4d..f9b3e7a 100644 --- a/configure.ac +++ b/configure.ac @@ -55,6 +55,10 @@ dnl gnulib doesn't actually define SOCK_NONBLOCK/SOCK_CLOEXEC, and dnl even if it did, emulation wouldn't be thread-safe AC_CHECK_FUNCS([accept4]) +dnl needed for glibc malloc tuning +AC_CHECK_HEADERS([malloc.h]) +AC_CHECK_FUNCS([mallopt]) + AC_SUBST(NOSTD_CFLAGS) # This works for all platforms we care about: -- cgit v1.2.3-24-ge0c7