diff options
author | Eric Wong <normalperson@yhbt.net> | 2013-10-15 02:30:44 +0000 |
---|---|---|
committer | Eric Wong <normalperson@yhbt.net> | 2013-11-11 07:51:08 +0000 |
commit | ebf312e250ffbb9390df1878a4ad9f3cf106f1f9 (patch) | |
tree | 9340d5999a93c962f33e0238e0c01be24e8e1995 | |
parent | b7bda87ead4a53bb792dbbfb6079aad8cd4170de (diff) | |
download | cmogstored-malloc.tar.gz |
glibc malloc creates arenas aggressively to avoid malloc contention. This is good for CPU-bound multithreaded programs which are malloc-dependent. However cmogstored uses multiple threads for concurrent disk/FS activity and avoids malloc in hot/common paths. Thus malloc should _never_ be a bottleneck for cmogstored. Although physical memory allocation is lazy on Linux kernels, the metadata overhead of the virtually allocated pages can still add up on a system with many disks/devices. I've observed 6-7G VmSize on cmogstored processes with only ~5M VmRSS on machines with many cores/devices and a few hundred clients.
-rw-r--r-- | alloc.c | 27 | ||||
-rw-r--r-- | configure.ac | 4 |
2 files changed, 31 insertions, 0 deletions
@@ -9,6 +9,10 @@ * even with normal, untrusted HTTP traffic. */ #include "cmogstored.h" +#ifdef HAVE_MALLOC_H +# include <malloc.h> +#endif + #define L1_CACHE_LINE_MAX 128 /* largest I've seen (Pentium 4) */ static size_t l1_cache_line_size = L1_CACHE_LINE_MAX; @@ -39,6 +43,29 @@ void mog_alloc_quit(void) __attribute__((constructor)) static void alloc_init(void) { +/* + * glibc malloc creates arenas aggressively to avoid malloc contention. + * This is good for CPU-bound multithreaded programs which are + * malloc-dependent. However cmogstored uses multiple threads for + * concurrent disk/FS activity and avoids malloc in hot/common paths. + * Thus malloc should _never_ be a bottleneck for cmogstored. Although + * physical memory allocation is lazy on Linux kernels, the metadata + * overhead of the virtually allocated pages can still add up on a + * system with many disks/devices. + * I've observed 6-7G VmSize on cmogstored processes with only ~5M VmRSS + * on machines with many cores/devices and a few hundred clients. + */ +#if defined(HAVE_MALLOPT) && defined(M_ARENA_MAX) && defined(M_ARENA_TEST) + unsigned long n = num_processors(NPROC_CURRENT); + static const unsigned long my_max = 4; + + if (!getenv("MALLOC_ARENA_MAX")) + mallopt(M_ARENA_MAX, n > my_max ? my_max : n); + + if (!getenv("MALLOC_ARENA_TEST")) + mallopt(M_ARENA_TEST, 1); +#endif /* glibc malloc tuning */ + l1_cache_line_size_detect(); atexit(mog_alloc_quit); } diff --git a/configure.ac b/configure.ac index 1911f4d..f9b3e7a 100644 --- a/configure.ac +++ b/configure.ac @@ -55,6 +55,10 @@ dnl gnulib doesn't actually define SOCK_NONBLOCK/SOCK_CLOEXEC, and dnl even if it did, emulation wouldn't be thread-safe AC_CHECK_FUNCS([accept4]) +dnl needed for glibc malloc tuning +AC_CHECK_HEADERS([malloc.h]) +AC_CHECK_FUNCS([mallopt]) + AC_SUBST(NOSTD_CFLAGS) # This works for all platforms we care about: |