diff options
author | Eric Wong <normalperson@yhbt.net> | 2013-02-23 20:41:33 +0000 |
---|---|---|
committer | Eric Wong <normalperson@yhbt.net> | 2013-02-23 20:43:49 +0000 |
commit | adc750ab6600980ba98d77d371efb07b38886f30 (patch) | |
tree | 104141f710f4aa50e8e587e212074bb982ee07c1 | |
parent | 8757c6458e67e9ab20f9a049a9a68f51b3229816 (diff) | |
download | cmogstored-adc750ab6600980ba98d77d371efb07b38886f30.tar.gz |
Items in the low-priority fsck queue could trigger a assertion failure during graceful shutdown due to improper handling of the MOG_NEXT_IGNORE state in mog_mgmt_quit_step(). However, using the fsck queue in graceful shutdown (which is single-threaded) is probably a bad idea anyways, as the fsck digest could monopolize other requests. So give no special handling to fsck digest queries during graceful shutdown. This only affects users running fsck with checksumming enabled during a graceful shutdown of cmogstored. For checksums users, it is recommended to stop fsck from the trackers and wait for all tracker queues to drain before upgrading cmogstored (and using graceful shutdown on the old cmogstored).
-rw-r--r-- | exit.c | 11 | ||||
-rw-r--r-- | fdmap.c | 3 | ||||
-rw-r--r-- | mgmt.c | 3 | ||||
-rw-r--r-- | mgmt_parser.rl | 13 | ||||
-rw-r--r-- | test/mgmt.rb | 87 |
5 files changed, 74 insertions, 43 deletions
@@ -5,14 +5,17 @@ #include "cmogstored.h" #include "nostd/setproctitle.h" -static void acceptor_quit(int fd) +static void acceptor_quit(int *fdp) { + int fd = *fdp; + if (fd >= 0) { struct mog_fd *mfd = mog_fd_get(fd); struct mog_accept *ac = &mfd->as.accept; mog_thrpool_quit(&ac->thrpool, NULL); mog_fd_put(mfd); + *fdp = -1; } } @@ -20,9 +23,9 @@ static bool svc_quit_accept_i(void *svcptr, void *ignored) { struct mog_svc *svc = svcptr; - acceptor_quit(svc->mgmt_fd); - acceptor_quit(svc->http_fd); - acceptor_quit(svc->httpget_fd); + acceptor_quit(&svc->mgmt_fd); + acceptor_quit(&svc->http_fd); + acceptor_quit(&svc->httpget_fd); return true; } @@ -137,6 +137,9 @@ void mog_fdmap_requeue(struct mog_queue *quit_queue) mfd = aref(fd); switch (mfd->fd_type) { case MOG_FD_TYPE_MGMT: + /* ignore fsck priority in shutdown: */ + mfd->as.mgmt.prio = MOG_PRIO_NONE; + /* fall-through: */ case MOG_FD_TYPE_HTTP: case MOG_FD_TYPE_HTTPGET: mog_activeq_add(quit_queue, mfd); @@ -251,11 +251,14 @@ void mog_mgmt_quit_step(struct mog_fd *mfd) switch (mgmt_queue_step(mfd)) { case MOG_NEXT_WAIT_RD: if (mgmt->forward || mgmt->rbuf) { + /* something is in progress, do not drop it */ mog_idleq_push(q, mfd, MOG_QEV_RD); return; } /* fall-through */ case MOG_NEXT_IGNORE: /* no new iostat watchers during shutdown */ + assert(mgmt->prio == MOG_PRIO_NONE && "bad prio"); + /* fall-through */ case MOG_NEXT_CLOSE: mog_nr_active_at_quit--; mgmt_close(mfd); diff --git a/mgmt_parser.rl b/mgmt_parser.rl index 771d3d5..e183cc3 100644 --- a/mgmt_parser.rl +++ b/mgmt_parser.rl @@ -4,12 +4,23 @@ */ #include "cmogstored.h" #include "mgmt.h" + +/* + * only set fsck prio if we're still accepting connections, graceful + * shutdown in single-threaded mode uses normal (fair) prio + */ +static void set_prio_fsck(struct mog_mgmt *mgmt) +{ + if (mgmt->svc->mgmt_fd >= 0) + mgmt->prio = MOG_PRIO_FSCK; +} + %%{ machine mgmt_parser; eor = '\r'?'\n'; path = "/"[a-zA-Z0-9/\.\-]*; - reason = ' '("fsck" @ { mgmt->prio = MOG_PRIO_FSCK; } | [a-zA-Z0-9_]+); + reason = ' '("fsck" @ { set_prio_fsck(mgmt); } | [a-zA-Z0-9_]+); invalid_line := ( [ \t]* ([^ \t\r]+) > { mgmt->mark[0] = fpc - buf; } diff --git a/test/mgmt.rb b/test/mgmt.rb index e5ed9d9..373cd69 100644 --- a/test/mgmt.rb +++ b/test/mgmt.rb @@ -188,37 +188,14 @@ class TestMgmt < Test::Unit::TestCase end def test_size_huge - Dir.mkdir("#@tmpdir/dev666") big = 2 * 1024 * 1024 * 1024 * 1020 # 2TB - File.open("#@tmpdir/dev666/sparse-file.fid", "w") do |fp| - begin - fp.seek(big - 1) - rescue Errno::EINVAL, Errno::ENOSPC - big /= 2 - warn "trying large file size: #{big}" - retry - end - fp.write('.') - end + sparse_file_prepare(big) t("/dev666/sparse-file.fid #{big}", "size /dev666/sparse-file.fid") rescue Errno::ENOSPC end def test_concurrent_md5_fsck - Dir.mkdir("#@tmpdir/dev666") - big = 1024 * 1024 * 500 # only 500M - big /= 10 if ENV["VALGRIND"] # valgrind slows us down enough :P - File.open("#@tmpdir/dev666/sparse-file.fid", "w") do |fp| - begin - fp.seek(big - 1) - rescue Errno::EINVAL, Errno::ENOSPC - big /= 2 - warn "trying large file size: #{big}" - retry - end - fp.write('.') - end - + sparse_file_prepare threads = (0..5).map do Thread.new do c = get_client @@ -237,19 +214,7 @@ class TestMgmt < Test::Unit::TestCase end def test_concurrent_md5_fsck_pipelined - Dir.mkdir("#@tmpdir/dev666") - big = 1024 * 1024 * 500 # only 500M - big /= 10 if ENV["VALGRIND"] # valgrind slows us down enough :P - File.open("#@tmpdir/dev666/sparse-file.fid", "w") do |fp| - begin - fp.seek(big - 1) - rescue Errno::EINVAL, Errno::ENOSPC - big /= 2 - warn "trying large file size: #{big}" - retry - end - fp.write('.') - end + sparse_file_prepare threads = (0..5).map do Thread.new do @@ -270,6 +235,34 @@ class TestMgmt < Test::Unit::TestCase rescue Errno::ENOSPC end + # ensure aborted requests do not trigger failure in graceful shutdown + def test_concurrent_md5_fsck_abort + sparse_file_prepare + File.open("#@tmpdir/dev666/sparse-file.fid") do |fp| + if fp.respond_to?(:advise) + # clear the cache + fp.advise(:dontneed) + req = "MD5 /dev666/sparse-file.fid fsck\r\n" + starter = get_client + clients = (1..5).map { get_client } + + starter.write(req) + threads = clients.map do |c| + Thread.new(c) do |client| + client.write(req) + client.shutdown + client.close + :ok + end + end + threads.each { |thr| assert_equal :ok, thr.value } + line = starter.gets + assert_match(%r{\A/dev666/sparse-file\.fid MD5=[a-f0-9]{32}\r\n}, line) + starter.close + end + end + end + def test_aio_threads tries = 1000 @client.write "WTF\r\n" @@ -325,4 +318,22 @@ class TestMgmt < Test::Unit::TestCase assert_equal ".\n", @client.gets end if `which iostat 2>/dev/null`.chomp.size != 0 && RUBY_PLATFORM !~ /kfreebsd-gnu/ + + def sparse_file_prepare(big = nil) + Dir.mkdir("#@tmpdir/dev666") + if nil == big + big = 1024 * 1024 * 500 # only 500M + big /= 10 if ENV["VALGRIND"] # valgrind slows us down enough :P + end + File.open("#@tmpdir/dev666/sparse-file.fid", "w") do |fp| + begin + fp.seek(big - 1) + rescue Errno::EINVAL, Errno::ENOSPC + big /= 2 + warn "trying large file size: #{big}" + retry + end + fp.write('.') + end + end end |