Skip to content

Commit d3e0ca7

Browse files
committed
crimson/mgr/client:Introduce Client::send()
Client::reconnect nullifies the connection used by the mgr client before setting a new one. In this time, we might re-use the nullptr connection due to tasks that are being run in the background (See: dispatch_in_background). To avoid this, we had multiple `if (!conn)` checks, some methods even checked this condition twice to reduce the possibilty of using undefined the connection. Instead of introducing an additional check in Client::_send_report, Introduce Client::send which would be responsible for: a) Veryfing the connection is set b) Trying to get a shared access to conn_lock Client::reconnect will lock conn_lock exclusivly until the connection is set. If we send is called while reconnecting, sending will be dropped - same as before. Fixes: https://tracker.ceph.com/issues/70179 Signed-off-by: Matan Breizman <[email protected]>
1 parent 6e23a1e commit d3e0ca7

File tree

2 files changed

+32
-17
lines changed

2 files changed

+32
-17
lines changed

src/crimson/mgr/client.cc

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "client.h"
55

66
#include <seastar/core/sleep.hh>
7+
#include <seastar/util/defer.hh>
78

89
#include "crimson/common/log.h"
910
#include "crimson/net/Connection.h"
@@ -51,6 +52,25 @@ seastar::future<> Client::stop()
5152
co_await gates.close_all();
5253
}
5354

55+
seastar::future<> Client::send(MessageURef msg)
56+
{
57+
LOG_PREFIX(Client::send);
58+
DEBUGDPP("{}", *this, *msg);
59+
if (!conn_lock.try_lock_shared()) {
60+
WARNDPP("ongoing reconnect, report skipped", *this, *msg);
61+
co_return;
62+
}
63+
auto unlocker = seastar::defer([this] {
64+
conn_lock.unlock_shared();
65+
});
66+
if (!conn) {
67+
WARNDPP("no conn available, report skipped", *this, *msg);
68+
co_return;
69+
}
70+
DEBUGDPP("sending {}", *this, *msg);
71+
co_await conn->send(std::move(msg));
72+
}
73+
5474
std::optional<seastar::future<>>
5575
Client::ms_dispatch(crimson::net::ConnectionRef conn, MessageRef m)
5676
{
@@ -89,7 +109,7 @@ void Client::ms_handle_connect(
89109
m->daemon_name = local_conf()->name.get_id();
90110
local_conf().get_config_bl(0, &m->config_bl, &last_config_bl_version);
91111
local_conf().get_defaults_bl(&m->config_defaults_bl);
92-
return conn->send(std::move(m));
112+
return send(std::move(m));
93113
} else {
94114
DEBUGDPP("connection changed", *this);
95115
return seastar::now();
@@ -117,6 +137,10 @@ seastar::future<> Client::reconnect()
117137
{
118138
LOG_PREFIX(Client::reconnect);
119139
DEBUGDPP("", *this);
140+
co_await conn_lock.lock();
141+
auto unlocker = seastar::defer([this] {
142+
conn_lock.unlock();
143+
});
120144
if (conn) {
121145
DEBUGDPP("marking down", *this);
122146
conn->mark_down();
@@ -185,17 +209,9 @@ void Client::report()
185209
_send_report();
186210
gates.dispatch_in_background(__func__, *this, [this, FNAME] {
187211
DEBUGDPP("dispatching in background", *this);
188-
if (!conn) {
189-
WARNDPP("no conn available; report skipped", *this);
190-
return seastar::now();
191-
}
192212
return with_stats.get_stats(
193-
).then([this, FNAME](auto &&pg_stats) {
194-
if (!conn) {
195-
WARNDPP("no conn available; before sending stats, report skipped", *this);
196-
return seastar::now();
197-
}
198-
return conn->send(std::move(pg_stats));
213+
).then([this](auto &&pg_stats) {
214+
return send(std::move(pg_stats));
199215
});
200216
});
201217
}
@@ -211,10 +227,6 @@ void Client::_send_report()
211227
DEBUGDPP("", *this);
212228
gates.dispatch_in_background(__func__, *this, [this, FNAME] {
213229
DEBUGDPP("dispatching in background", *this);
214-
if (!conn) {
215-
WARNDPP("cannot send report; no conn available", *this);
216-
return seastar::now();
217-
}
218230
auto report = make_message<MMgrReport>();
219231
// Adding empty information since we don't support perfcounters yet
220232
report->undeclare_types.emplace_back();
@@ -235,10 +247,10 @@ void Client::_send_report()
235247
return get_perf_report_cb(
236248
).then([report=std::move(report), this](auto payload) mutable {
237249
report->metric_report_message = MetricReportMessage(std::move(payload));
238-
return conn->send(std::move(report));
250+
return send(std::move(report));
239251
});
240252
}
241-
return conn->send(std::move(report));
253+
return send(std::move(report));
242254
});
243255
}
244256

src/crimson/mgr/client.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#pragma once
55

66
#include <seastar/core/timer.hh>
7+
#include <seastar/core/shared_mutex.hh>
78

89
#include "crimson/common/gated.h"
910
#include "crimson/net/Dispatcher.h"
@@ -41,6 +42,7 @@ class Client : public crimson::net::Dispatcher {
4142
get_perf_report_cb_t cb_get);
4243
seastar::future<> start();
4344
seastar::future<> stop();
45+
seastar::future<> send(MessageURef msg);
4446
void report();
4547
void update_daemon_health(std::vector<DaemonHealthMetric>&& metrics);
4648

@@ -62,6 +64,7 @@ class Client : public crimson::net::Dispatcher {
6264
crimson::net::Messenger& msgr;
6365
WithStats& with_stats;
6466
crimson::net::ConnectionRef conn;
67+
seastar::shared_mutex conn_lock;
6568
seastar::timer<seastar::lowres_clock> report_timer;
6669
crimson::common::gate_per_shard gates;
6770
uint64_t last_config_bl_version = 0;

0 commit comments

Comments
 (0)