Skip to content

Commit f070b4a

Browse files
committed
MB-58609: NIM needs to run in its own thread
Previously we used the "main thead" of the program to accept clients, reconfigure network interfaces etc. This is problematic during shutdown as we terminate the event loop for the main thread and initiate shutdown for the other components. As part of shutting down we need to stop all of the worker threads, but we might have a command currently trying to run network reconfigure. This will hang "forever" as we've stopped running the event dispatch loop. Change-Id: Ieebe65e2b542d9878b9faaf01b832c87cc7df094 Reviewed-on: https://review.couchbase.org/c/kv_engine/+/197098 Tested-by: Build Bot <[email protected]> Reviewed-by: Jim Walker <[email protected]>
1 parent 5e5aa4c commit f070b4a

File tree

5 files changed

+106
-21
lines changed

5 files changed

+106
-21
lines changed

daemon/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ add_library(memcached_daemon STATIC
6464
network_interface_description.h
6565
network_interface_manager.cc
6666
network_interface_manager.h
67+
network_interface_manager_thread.cc
68+
network_interface_manager_thread.h
6769
nobucket_taskable.cc
6870
nobucket_taskable.h
6971
resource_allocation_domain.cc

daemon/memcached.cc

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
#include "mcaudit.h"
2626
#include "mcbp_executors.h"
2727
#include "network_interface.h"
28-
#include "network_interface_manager.h"
28+
#include "network_interface_manager_thread.h"
2929
#include "nobucket_taskable.h"
3030
#include "protocol/mcbp/engine_wrapper.h"
3131
#include "settings.h"
@@ -1005,12 +1005,8 @@ int memcached_main(int argc, char** argv) {
10051005
}
10061006
#endif
10071007

1008-
LOG_INFO_RAW("Starting network interface manager");
1009-
networkInterfaceManager = std::make_unique<NetworkInterfaceManager>(
1010-
*main_base, prometheus_auth_callback);
1011-
networkInterfaceManager->createBootstrapInterface();
1012-
1013-
/* start up worker threads if MT mode */
1008+
auto nim_thread = std::make_unique<NetworkInterfaceManagerThread>(
1009+
prometheus_auth_callback);
10141010
worker_threads_init();
10151011

10161012
LOG_INFO(R"(Starting Phosphor tracing with config: "{}")",
@@ -1051,9 +1047,10 @@ int memcached_main(int argc, char** argv) {
10511047
}
10521048
}
10531049

1050+
LOG_INFO_RAW("Initialization complete. Accepting clients.");
1051+
nim_thread->start();
1052+
10541053
if (!memcached_shutdown) {
1055-
/* enter the event loop */
1056-
LOG_INFO_RAW("Initialization complete. Accepting clients.");
10571054
main_base->loopForever();
10581055
}
10591056

@@ -1073,8 +1070,10 @@ int memcached_main(int argc, char** argv) {
10731070
LOG_INFO_RAW("Shutting down client worker threads");
10741071
threads_shutdown();
10751072

1076-
LOG_INFO_RAW("Releasing server sockets");
1077-
networkInterfaceManager.reset();
1073+
LOG_INFO_RAW("Shutting down network interface manager thread");
1074+
nim_thread->shutdown();
1075+
nim_thread->waitForState(Couchbase::ThreadState::Zombie);
1076+
nim_thread.reset();
10781077

10791078
LOG_INFO_RAW("Releasing bucket resources");
10801079
cleanup_buckets();
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Copyright 2023-Present Couchbase, Inc.
3+
*
4+
* Use of this software is governed by the Business Source License included
5+
* in the file licenses/BSL-Couchbase.txt. As of the Change Date specified
6+
* in that file, in accordance with the Business Source License, use of this
7+
* software will be governed by the Apache License, Version 2.0, included in
8+
* the file licenses/APL2.txt.
9+
*/
10+
#include "network_interface_manager_thread.h"
11+
#include "network_interface_manager.h"
12+
13+
#include "log_macros.h"
14+
15+
NetworkInterfaceManagerThread::NetworkInterfaceManagerThread(
16+
cb::prometheus::AuthCallback authCB)
17+
: Couchbase::Thread("mcd:nim"), base(std::make_unique<folly::EventBase>()) {
18+
LOG_INFO_RAW("Starting network interface manager");
19+
networkInterfaceManager =
20+
std::make_unique<NetworkInterfaceManager>(*base, std::move(authCB));
21+
networkInterfaceManager->createBootstrapInterface();
22+
}
23+
24+
void NetworkInterfaceManagerThread::run() {
25+
setRunning();
26+
try {
27+
base->loopForever();
28+
} catch (const std::exception& exception) {
29+
LOG_ERROR(
30+
"NetworkInterfaceManagerThread::run(): received exception: {}",
31+
exception.what());
32+
}
33+
34+
LOG_INFO_RAW("Releasing server sockets");
35+
networkInterfaceManager.reset();
36+
LOG_INFO_RAW("Network interface manager thread stopped");
37+
}
38+
39+
void NetworkInterfaceManagerThread::shutdown() {
40+
base->terminateLoopSoon();
41+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright 2023-Present Couchbase, Inc.
3+
*
4+
* Use of this software is governed by the Business Source License included
5+
* in the file licenses/BSL-Couchbase.txt. As of the Change Date specified
6+
* in that file, in accordance with the Business Source License, use of this
7+
* software will be governed by the Apache License, Version 2.0, included in
8+
* the file licenses/APL2.txt.
9+
*/
10+
11+
#pragma once
12+
13+
#include <folly/io/async/EventBase.h>
14+
#include <platform/thread.h>
15+
#include <statistics/prometheus.h>
16+
#include <memory>
17+
18+
/**
19+
* The NetworkInterfaceManagerThread class runs all of the code
20+
* in the network manager (accept clients, reconfigure interface
21+
* etc). It needs to be run in its own thread as we clients may
22+
* be performing an operation which touch this code during
23+
* shutdown where the "main thread" has stopped the event loop
24+
* and is stopping other components.
25+
*/
26+
class NetworkInterfaceManagerThread : public Couchbase::Thread {
27+
public:
28+
/**
29+
* Create a new thread which use the provided callback for
30+
* prometheus authentication
31+
*
32+
* @param authCB callback performing authentication
33+
*/
34+
NetworkInterfaceManagerThread(cb::prometheus::AuthCallback authCB);
35+
NetworkInterfaceManagerThread() = delete;
36+
NetworkInterfaceManagerThread(const NetworkInterfaceManagerThread&) =
37+
delete;
38+
39+
/// Request the thread to stop
40+
void shutdown();
41+
42+
protected:
43+
/// The main loop of the thread
44+
void run() override;
45+
46+
/// The event base used for this thread
47+
std::unique_ptr<folly::EventBase> base;
48+
};

daemon/server_socket.cc

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -109,16 +109,6 @@ void LibeventServerSocketImpl::listen_event_handler(evutil_socket_t,
109109
short,
110110
void* arg) {
111111
auto& c = *reinterpret_cast<LibeventServerSocketImpl*>(arg);
112-
113-
if (is_memcached_shutting_down()) {
114-
// Someone requested memcached to shut down. The listen thread should
115-
// be stopped immediately to avoid new connections
116-
LOG_INFO_RAW("Stopping listen thread");
117-
event_del(c.ev.get());
118-
c.ev.reset();
119-
return;
120-
}
121-
122112
try {
123113
c.acceptNewClient();
124114
} catch (std::invalid_argument& e) {
@@ -225,6 +215,11 @@ void LibeventServerSocketImpl::acceptNewClient() {
225215
return;
226216
}
227217

218+
if (is_memcached_shutting_down()) {
219+
safe_close(client);
220+
return;
221+
}
222+
228223
if (interface->tls && !tls_configured) {
229224
tls_configured = networkInterfaceManager->isTlsConfigured();
230225
if (!tls_configured) {

0 commit comments

Comments
 (0)