Skip to content

Commit 9969477

Browse files
committed
Merge PR ceph#57552 into main
* refs/pull/57552/head: mds/queisce-db: collect acks while bootstrapping Reviewed-by: Patrick Donnelly <[email protected]>
2 parents 9d3f590 + d6fb875 commit 9969477

File tree

4 files changed

+37
-14
lines changed

4 files changed

+37
-14
lines changed

src/mds/QuiesceDb.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,17 @@ operator<<(std::basic_ostream<CharT, Traits>& os, const QuiesceMap& map)
681681
struct QuiesceDbPeerAck {
682682
QuiesceInterface::PeerId origin;
683683
QuiesceMap diff_map;
684+
685+
QuiesceDbPeerAck() = default;
686+
QuiesceDbPeerAck(QuiesceDbPeerAck const&) = default;
687+
QuiesceDbPeerAck(QuiesceDbPeerAck &&) = default;
688+
QuiesceDbPeerAck(QuiesceInterface::PeerId origin, std::convertible_to<QuiesceMap> auto&& diff_map)
689+
: origin(origin)
690+
, diff_map(std::forward<QuiesceMap>(diff_map))
691+
{}
692+
693+
QuiesceDbPeerAck& operator=(QuiesceDbPeerAck const&) = default;
694+
QuiesceDbPeerAck& operator=(QuiesceDbPeerAck&&) = default;
684695
};
685696

686697
template <class CharT, class Traits>

src/mds/QuiesceDbManager.cc

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -110,14 +110,20 @@ void* QuiesceDbManager::quiesce_db_thread_main()
110110
// we're good to process things
111111
next_event_at_age = leader_upkeep(std::move(acks), std::move(requests));
112112
} else {
113-
// not yet there. Put the requests back onto the queue and wait for updates
113+
// not yet there. Put the acks and requests back onto the queue and wait for updates
114114
ls.lock();
115115
while (!requests.empty()) {
116116
pending_requests.emplace_front(std::move(requests.back()));
117117
requests.pop_back();
118118
}
119+
while (!acks.empty()) {
120+
pending_acks.emplace_front(std::move(acks.back()));
121+
acks.pop_back();
122+
}
119123
if (pending_db_updates.empty()) {
120-
dout(5) << "bootstrap: waiting for peer updates with timeout " << bootstrap_delay << dendl;
124+
dout(5) << "bootstrap: waiting for new peers with pending acks: " << pending_acks.size()
125+
<< " requests: " << pending_requests.size()
126+
<< ". Wait timeout: " << bootstrap_delay << dendl;
121127
submit_condition.wait_for(ls, bootstrap_delay);
122128
}
123129
continue;
@@ -401,7 +407,7 @@ QuiesceTimeInterval QuiesceDbManager::leader_upkeep(decltype(pending_acks)&& ack
401407
while (!acks.empty()) {
402408
auto& [from, diff_map] = acks.front();
403409
leader_record_ack(from, std::move(diff_map));
404-
acks.pop();
410+
acks.pop_front();
405411
}
406412

407413
// process requests
@@ -439,7 +445,9 @@ void QuiesceDbManager::complete_requests() {
439445
}
440446
}
441447
}
448+
442449
// non-zero result codes are all errors
450+
dout(10) << "completing request '" << req->request << " with rc: " << -res << dendl;
443451
req->complete(-res);
444452
}
445453
done_requests.clear();
@@ -940,16 +948,20 @@ size_t QuiesceDbManager::check_peer_reports(const QuiesceSetId& set_id, const Qu
940948
continue;
941949
}
942950
reported_state = pr_state.state;
943-
reporting_peers.insert({pr_state.state, {peer, info.diff_map.db_version}});
944951
}
945952

946953
// but we only consider the peer up to date given the version
947954
if (info.diff_map.db_version >= QuiesceDbVersion { membership.epoch, set.version }) {
948955
up_to_date_peers++;
949956
}
950957

951-
min_reported_state = std::min(min_reported_state, reported_state);
952-
max_reported_state = std::max(max_reported_state, reported_state);
958+
// we keep track of reported states only if the peer actually said something
959+
// even if for an older version
960+
if (info.diff_map.db_version.set_version > 0) {
961+
reporting_peers.insert({ reported_state, { peer, info.diff_map.db_version } });
962+
min_reported_state = std::min(min_reported_state, reported_state);
963+
max_reported_state = std::max(max_reported_state, reported_state);
964+
}
953965
}
954966

955967
if (min_reported_state == QS__MAX) {
@@ -1099,15 +1111,16 @@ QuiesceTimeInterval QuiesceDbManager::leader_upkeep_awaits()
10991111
for (auto it = awaits.begin(); it != awaits.end();) {
11001112
auto & [set_id, actx] = *it;
11011113
Db::Sets::const_iterator set_it = db.sets.find(set_id);
1114+
QuiesceState set_state = QS__INVALID;
11021115

11031116
int rc = db.get_age() >= actx.expire_at_age ? EINPROGRESS : EBUSY;
11041117

11051118
if (set_it == db.sets.cend()) {
11061119
rc = ENOENT;
11071120
} else {
1108-
auto const & set = set_it->second;
1109-
1110-
switch(set.rstate.state) {
1121+
auto const& set = set_it->second;
1122+
set_state = set.rstate.state;
1123+
switch(set_state) {
11111124
case QS_CANCELED:
11121125
rc = ECANCELED;
11131126
break;
@@ -1140,7 +1153,6 @@ QuiesceTimeInterval QuiesceDbManager::leader_upkeep_awaits()
11401153
}
11411154

11421155
if (rc != EBUSY) {
1143-
dout(10) << "completing an await for the set '" << set_id << "' with rc: " << rc << dendl;
11441156
done_requests[actx.req_ctx] = rc;
11451157
it = awaits.erase(it);
11461158
} else {

src/mds/QuiesceDbManager.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ class QuiesceDbManager {
100100
return -ESTALE;
101101
}
102102

103-
pending_acks.push(std::move(ack));
103+
pending_acks.emplace_back(std::move(ack));
104104
submit_condition.notify_all();
105105
return 0;
106106
}
@@ -138,7 +138,7 @@ class QuiesceDbManager {
138138

139139
if (cluster_membership->leader == cluster_membership->me) {
140140
// local delivery
141-
pending_acks.push({ cluster_membership->me, std::move(diff_map) });
141+
pending_acks.emplace_back(cluster_membership->me, std::move(diff_map));
142142
submit_condition.notify_all();
143143
} else {
144144
// send to the leader outside of the lock
@@ -201,7 +201,7 @@ class QuiesceDbManager {
201201
std::optional<AgentCallback> agent_callback;
202202
std::optional<QuiesceClusterMembership> cluster_membership;
203203
std::queue<QuiesceDbPeerListing> pending_db_updates;
204-
std::queue<QuiesceDbPeerAck> pending_acks;
204+
std::deque<QuiesceDbPeerAck> pending_acks;
205205
std::deque<RequestContext*> pending_requests;
206206
bool db_thread_should_exit = false;
207207
bool db_thread_should_clear_db = true;

src/test/mds/TestQuiesceDb.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -984,7 +984,7 @@ TEST_F(QuiesceDbTest, InterruptedQuiesceAwait)
984984
EXPECT_EQ(ERR(ETIMEDOUT), await2.wait_result());
985985

986986
// shouldn't have taken much longer than the timeout configured on the set
987-
auto epsilon = sec(0.01);
987+
auto epsilon = sec(0.05);
988988
ASSERT_LE(QuiesceClock::now() - then - epsilon, last_request->response.sets.at("set2").timeout);
989989

990990
// let's cancel set 1 while awaiting it a few times

0 commit comments

Comments
 (0)