Skip to content

Commit 24d93d4

Browse files
committed
KUDU-3633 shutdown DnsResolver in ServerBase::ShutdownImpl()
The thread pool of the DNS resolver should be shut down along with the messenger in ServerBase to prevent retrying of RPCs that failed as a collateral of the shutdown process in progress. Those RPCs might be retried by invoking rpc::Proxy::RefreshDnsAndEnqueueRequest(), etc. On the related note, I also added a guard to protect ThreadPool::tokens_ in the destructor of the ThreadPool class, as elsewhere. I also snuck in an update to call DCHECK() in a loop only when DCHECK_IS_ON() macro evaluates to 'true'. This addresses flakiness reported at least in one of the RemoteKsckTest scenarios (e.g., TestFilterOnNotabletTable in [1]). One of the related TSAN reports looked like below: RemoteKsckTest.TestFilterOnNotabletTable: WARNING: ThreadSanitizer: data race Read of size 8 at 0x7b54001e5118 by main thread: #0 std::__1::__hash_table<kudu::ThreadPoolToken*, ...>::size() const #1 std::__1::unordered_set<kudu::ThreadPoolToken*, ...>::size() const #2 kudu::ThreadPool::~ThreadPool() ... #6 kudu::kserver::KuduServer::~KuduServer() #7 kudu::tserver::TabletServer::~TabletServer() ... Previous write of size 8 at 0x7b54001e5118 by thread T262 ...: #0 std::__1::__hash_table<kudu::ThreadPoolToken*, ...>::remove(...) ... #4 kudu::ThreadPool::ReleaseToken(...) #5 kudu::ThreadPoolToken::~ThreadPoolToken() ... #24 kudu::consensus::LeaderElection::~LeaderElection() ... #35 kudu::rpc::Proxy::RefreshDnsAndEnqueueRequest(...) ... #41 kudu::DnsResolver::RefreshAddressesAsync() ... Thread T262 'dns-resolver [w' (tid=29102, running) created by thread T182 at: #0 pthread_create #1 kudu::Thread::StartThread(...) #2 kudu::Thread::Create(...) #3 kudu::ThreadPool::CreateThread() #4 kudu::ThreadPool::DoSubmit(..., kudu::ThreadPoolToken*) #5 kudu::ThreadPool::Submit(...) #6 kudu::DnsResolver::RefreshAddressesAsync(..) #7 kudu::rpc::Proxy::RefreshDnsAndEnqueueRequest(...) #8 kudu::rpc::Proxy::AsyncRequest(...) ... #15 kudu::rpc::OutboundCall::CallCallback() #16 kudu::rpc::OutboundCall::SetFailed() #17 kudu::rpc::Connection::Shutdown() #18 kudu::rpc::ReactorThread::ShutdownInternal() ... #25 kudu::rpc::ReactorThread::RunThread() ... [1] http://dist-test.cloudera.org:8080/test_drilldown?test_name=ksck_remote-test Change-Id: I525f1078a349dbd2926938bb4fcc3e80888dfbb4 Reviewed-on: http://gerrit.cloudera.org:8080/22434 Tested-by: Alexey Serbin <alexey@apache.org> Reviewed-by: Abhishek Chennaka <achennaka@cloudera.com> (cherry picked from commit fc40fcd) Reviewed-on: http://gerrit.cloudera.org:8080/22449
1 parent 0f99097 commit 24d93d4

File tree

4 files changed

+26
-6
lines changed

4 files changed

+26
-6
lines changed

src/kudu/server/server_base.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,6 +1222,7 @@ void ServerBase::ShutdownImpl() {
12221222
web_server_->Stop();
12231223
}
12241224
rpc_server_->Shutdown();
1225+
dns_resolver_->Shutdown();
12251226
if (messenger_) {
12261227
messenger_->Shutdown();
12271228
}

src/kudu/util/net/dns_resolver.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ DnsResolver::DnsResolver(int max_threads_num,
6767
}
6868

6969
DnsResolver::~DnsResolver() {
70+
Shutdown();
71+
}
72+
73+
void DnsResolver::Shutdown() {
7074
pool_->Shutdown();
7175
}
7276

@@ -87,7 +91,7 @@ void DnsResolver::ResolveAddressesAsync(const HostPort& hostport,
8791
const auto s = pool_->Submit([=]() {
8892
this->DoResolutionCb(hostport, addresses, cb);
8993
});
90-
if (!s.ok()) {
94+
if (PREDICT_FALSE(!s.ok())) {
9195
cb(s);
9296
}
9397
}
@@ -107,7 +111,7 @@ void DnsResolver::RefreshAddressesAsync(const HostPort& hostport,
107111
}
108112
this->DoResolutionCb(hostport, addresses, cb);
109113
});
110-
if (!s.ok()) {
114+
if (PREDICT_FALSE(!s.ok())) {
111115
cb(s);
112116
}
113117
}

src/kudu/util/net/dns_resolver.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,14 @@ class DnsResolver {
5252
MonoDelta cache_ttl = MonoDelta::FromSeconds(60));
5353
~DnsResolver();
5454

55+
// Shutdown the resolver's thread pool. After shutting down the thread pool,
56+
// RefreshAddressesAsync() returns Status::ServiceUnavailable(),
57+
// while ResolveAddressesAsync() returns cached not-yet-expired entry
58+
// from the cache (if present) or Status::ServiceUnavailable() otherwise.
59+
// The behavior of ResolveAddresses() isn't affected by the status of the
60+
// resolver's thread pool.
61+
void Shutdown();
62+
5563
// Synchronously resolve addresses corresponding to the specified host:port
5664
// pair in 'hostport'. Note that a host may resolve to more than one IP
5765
// address.

src/kudu/util/threadpool.cc

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -387,10 +387,15 @@ ThreadPool::ThreadPool(const ThreadPoolBuilder& builder)
387387
}
388388

389389
ThreadPool::~ThreadPool() {
390-
// There should only be one live token: the one used in tokenless submission.
391-
CHECK_EQ(1, tokens_.size()) << Substitute(
392-
"Threadpool $0 destroyed with $1 allocated tokens",
393-
name_, tokens_.size());
390+
#if DCHECK_IS_ON()
391+
{
392+
// There should only be one live token: the one used in tokenless submission.
393+
std::lock_guard guard(lock_);
394+
DCHECK_EQ(1, tokens_.size()) << Substitute(
395+
"Threadpool $0 destroyed with $1 allocated tokens",
396+
name_, tokens_.size());
397+
}
398+
#endif
394399
Shutdown();
395400
}
396401

@@ -472,11 +477,13 @@ void ThreadPool::Shutdown() {
472477
no_threads_cond_.Wait();
473478
}
474479

480+
#if DCHECK_IS_ON()
475481
// All the threads have exited. Check the state of each token.
476482
for (auto* t : tokens_) {
477483
DCHECK(t->state() == ThreadPoolToken::State::IDLE ||
478484
t->state() == ThreadPoolToken::State::QUIESCED);
479485
}
486+
#endif
480487

481488
// Finally release the queued tasks, outside the lock.
482489
lock.unlock();

0 commit comments

Comments
 (0)