Skip to content

Commit b3edbba

Browse files
committed
mds: QuiesceDbManager: mark next retry event during bootstrap
Fixes: https://tracker.ceph.com/issues/66406 Signed-off-by: Leonid Usov <[email protected]>
1 parent 8b419ce commit b3edbba

File tree

3 files changed

+77
-5
lines changed

3 files changed

+77
-5
lines changed

src/mds/QuiesceDbManager.cc

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,9 @@ void* QuiesceDbManager::quiesce_db_thread_main()
8787
if (next_event_at_age <= db_age) {
8888
break;
8989
}
90+
dout(20) << "db idle, age: " << db_age << " next_event_at_age: " << next_event_at_age << dendl;
9091
auto timeout = std::min(max_wait, next_event_at_age - db_age);
91-
auto wait_result = submit_condition.wait_for(ls, timeout);
92-
if (std::cv_status::timeout == wait_result) {
93-
dout(20) << "db idle, age: " << db_age << dendl;
94-
}
92+
submit_condition.wait_for(ls, timeout);
9593
}
9694

9795
auto [is_member, should_exit] = membership_upkeep();
@@ -111,6 +109,8 @@ void* QuiesceDbManager::quiesce_db_thread_main()
111109
next_event_at_age = leader_upkeep(std::move(acks), std::move(requests));
112110
} else {
113111
// not yet there. Put the acks and requests back onto the queue and wait for updates
112+
// We should mark the next event age in case we get caught up in the sleep above
113+
next_event_at_age = db.get_age() + bootstrap_delay;
114114
ls.lock();
115115
while (!requests.empty()) {
116116
pending_requests.emplace_front(std::move(requests.back()));
@@ -121,6 +121,12 @@ void* QuiesceDbManager::quiesce_db_thread_main()
121121
acks.pop_back();
122122
}
123123
if (pending_db_updates.empty()) {
124+
// we are waiting here because if requests/acks aren't empty
125+
// the code above will skip the sleep due to the `db_thread_has_work`
126+
// returning true, causing a busy-loop of the quiesce manager thread.
127+
// This sleep may be interrupted by the submit_condition, in which case
128+
// we will re-consider everything and may end up here again, but with a shorter
129+
// bootstrap_delay.
124130
dout(5) << "bootstrap: waiting for new peers with pending acks: " << pending_acks.size()
125131
<< " requests: " << pending_requests.size()
126132
<< ". Wait timeout: " << bootstrap_delay << dendl;

src/mds/QuiesceDbManager.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ class QuiesceDbManager {
281281
std::unordered_map<RequestContext*, int> done_requests;
282282

283283
void* quiesce_db_thread_main();
284-
bool db_thread_has_work() const;
284+
virtual bool db_thread_has_work() const;
285285

286286
using IsMemberBool = bool;
287287
using ShouldExitBool = bool;

src/test/mds/TestQuiesceDb.cc

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,19 @@ class QuiesceDbTest: public testing::Test {
9191
submit_condition.notify_all();
9292
return ++cluster_membership->epoch;
9393
}
94+
std::atomic<std::optional<bool>> has_work_override;
95+
bool db_thread_has_work() const override {
96+
if (auto has_work = has_work_override.load()) {
97+
return *has_work;
98+
}
99+
return QuiesceDbManager::db_thread_has_work();
100+
}
101+
102+
void spurious_submit_wakeup()
103+
{
104+
std::lock_guard l(submit_mutex);
105+
submit_condition.notify_all();
106+
}
94107
};
95108

96109
epoch_t epoch = 0;
@@ -113,6 +126,16 @@ class QuiesceDbTest: public testing::Test {
113126
return promise.get_future();
114127
}
115128

129+
using ListingHook = std::function<bool(QuiesceInterface::PeerId, QuiesceDbListing&)>;
130+
std::list<std::pair<ListingHook, std::promise<void>>> listing_hooks;
131+
132+
std::future<void> add_listing_hook(ListingHook&& predicate)
133+
{
134+
std::lock_guard l(comms_mutex);
135+
auto&& [_, promise] = listing_hooks.emplace_back(predicate, std::promise<void> {});
136+
return promise.get_future();
137+
}
138+
116139
void SetUp() override {
117140
for (QuiesceInterface::PeerId r = mds_gid_t(1); r < mds_gid_t(11); r++) {
118141
managers[r].reset(new TestQuiesceDbManager());
@@ -153,8 +176,18 @@ class QuiesceDbTest: public testing::Test {
153176
std::unique_lock l(comms_mutex);
154177
if (epoch == this->epoch) {
155178
if (this->managers.contains(recipient)) {
179+
std::queue<std::promise<void>> done_hooks;
156180
dout(10) << "listing from " << me << " (leader=" << leader << ") to " << recipient << " for version " << listing.db_version << " with " << listing.sets.size() << " sets" << dendl;
157181

182+
for (auto it = listing_hooks.begin(); it != listing_hooks.end();) {
183+
if (it->first(recipient, listing)) {
184+
done_hooks.emplace(std::move(it->second));
185+
it = listing_hooks.erase(it);
186+
} else {
187+
it++;
188+
}
189+
}
190+
158191
ceph::bufferlist bl;
159192
encode(listing, bl);
160193
listing.clear();
@@ -163,6 +196,11 @@ class QuiesceDbTest: public testing::Test {
163196

164197
this->managers[recipient]->submit_peer_listing({me, std::move(listing)});
165198
comms_cond.notify_all();
199+
l.unlock();
200+
while (!done_hooks.empty()) {
201+
done_hooks.front().set_value();
202+
done_hooks.pop();
203+
}
166204
return 0;
167205
}
168206
}
@@ -1346,6 +1384,34 @@ TEST_F(QuiesceDbTest, LeaderShutdown)
13461384
}
13471385
}
13481386

1387+
/* ================================================================ */
1388+
TEST_F(QuiesceDbTest, MultiRankBootstrap)
1389+
{
1390+
// create a cluster with a peer that doesn't process messages
1391+
managers.at(mds_gid_t(2))->has_work_override = false;
1392+
ASSERT_NO_FATAL_FAILURE(configure_cluster({ mds_gid_t(1), mds_gid_t(2) }));
1393+
1394+
const QuiesceTimeInterval PEER_DISCOVERY_INTERVAL = std::chrono::milliseconds(1100);
1395+
1396+
// we should be now in the bootstrap loop,
1397+
// which should send discoveries to silent peers
1398+
// once in PEER_DISCOVERY_INTERVAL
1399+
for (int i = 0; i < 5; i++) {
1400+
1401+
if (i > 2) {
1402+
// through a wrench by disrupting the wait sleep in the bootstrap flow
1403+
managers.at(mds_gid_t(1))->spurious_submit_wakeup();
1404+
}
1405+
1406+
// wait for the next peer discovery request
1407+
auto saw_discovery = add_listing_hook([](auto recipient, auto const& listing) {
1408+
return recipient == mds_gid_t(2) && listing.db_version.set_version == 0;
1409+
});
1410+
1411+
EXPECT_EQ(std::future_status::ready, saw_discovery.wait_for(PEER_DISCOVERY_INTERVAL + std::chrono::milliseconds(100)));
1412+
}
1413+
}
1414+
13491415
/* ================================================================ */
13501416
TEST_F(QuiesceDbTest, MultiRankQuiesce)
13511417
{

0 commit comments

Comments
 (0)