@@ -91,6 +91,19 @@ class QuiesceDbTest: public testing::Test {
9191 submit_condition.notify_all ();
9292 return ++cluster_membership->epoch ;
9393 }
94+ std::atomic<std::optional<bool >> has_work_override;
95+ bool db_thread_has_work () const override {
96+ if (auto has_work = has_work_override.load ()) {
97+ return *has_work;
98+ }
99+ return QuiesceDbManager::db_thread_has_work ();
100+ }
101+
102+ void spurious_submit_wakeup ()
103+ {
104+ std::lock_guard l (submit_mutex);
105+ submit_condition.notify_all ();
106+ }
94107 };
95108
96109 epoch_t epoch = 0 ;
@@ -113,6 +126,16 @@ class QuiesceDbTest: public testing::Test {
113126 return promise.get_future ();
114127 }
115128
129+ using ListingHook = std::function<bool (QuiesceInterface::PeerId, QuiesceDbListing&)>;
130+ std::list<std::pair<ListingHook, std::promise<void >>> listing_hooks;
131+
132+ std::future<void > add_listing_hook (ListingHook&& predicate)
133+ {
134+ std::lock_guard l (comms_mutex);
135+ auto && [_, promise] = listing_hooks.emplace_back (predicate, std::promise<void > {});
136+ return promise.get_future ();
137+ }
138+
116139 void SetUp () override {
117140 for (QuiesceInterface::PeerId r = mds_gid_t (1 ); r < mds_gid_t (11 ); r++) {
118141 managers[r].reset (new TestQuiesceDbManager ());
@@ -153,8 +176,18 @@ class QuiesceDbTest: public testing::Test {
153176 std::unique_lock l (comms_mutex);
154177 if (epoch == this ->epoch ) {
155178 if (this ->managers .contains (recipient)) {
179+ std::queue<std::promise<void >> done_hooks;
156180 dout (10 ) << " listing from " << me << " (leader=" << leader << " ) to " << recipient << " for version " << listing.db_version << " with " << listing.sets .size () << " sets" << dendl;
157181
182+ for (auto it = listing_hooks.begin (); it != listing_hooks.end ();) {
183+ if (it->first (recipient, listing)) {
184+ done_hooks.emplace (std::move (it->second ));
185+ it = listing_hooks.erase (it);
186+ } else {
187+ it++;
188+ }
189+ }
190+
158191 ceph::bufferlist bl;
159192 encode (listing, bl);
160193 listing.clear ();
@@ -163,6 +196,11 @@ class QuiesceDbTest: public testing::Test {
163196
164197 this ->managers [recipient]->submit_peer_listing ({me, std::move (listing)});
165198 comms_cond.notify_all ();
199+ l.unlock ();
200+ while (!done_hooks.empty ()) {
201+ done_hooks.front ().set_value ();
202+ done_hooks.pop ();
203+ }
166204 return 0 ;
167205 }
168206 }
@@ -1346,6 +1384,34 @@ TEST_F(QuiesceDbTest, LeaderShutdown)
13461384 }
13471385}
13481386
1387+ /* ================================================================ */
1388+ TEST_F (QuiesceDbTest, MultiRankBootstrap)
1389+ {
1390+ // create a cluster with a peer that doesn't process messages
1391+ managers.at (mds_gid_t (2 ))->has_work_override = false ;
1392+ ASSERT_NO_FATAL_FAILURE (configure_cluster ({ mds_gid_t (1 ), mds_gid_t (2 ) }));
1393+
1394+ const QuiesceTimeInterval PEER_DISCOVERY_INTERVAL = std::chrono::milliseconds (1100 );
1395+
1396+ // we should be now in the bootstrap loop,
1397+ // which should send discoveries to silent peers
1398+ // once in PEER_DISCOVERY_INTERVAL
1399+ for (int i = 0 ; i < 5 ; i++) {
1400+
1401+ if (i > 2 ) {
1402+ // through a wrench by disrupting the wait sleep in the bootstrap flow
1403+ managers.at (mds_gid_t (1 ))->spurious_submit_wakeup ();
1404+ }
1405+
1406+ // wait for the next peer discovery request
1407+ auto saw_discovery = add_listing_hook ([](auto recipient, auto const & listing) {
1408+ return recipient == mds_gid_t (2 ) && listing.db_version .set_version == 0 ;
1409+ });
1410+
1411+ EXPECT_EQ (std::future_status::ready, saw_discovery.wait_for (PEER_DISCOVERY_INTERVAL + std::chrono::milliseconds (100 )));
1412+ }
1413+ }
1414+
13491415/* ================================================================ */
13501416TEST_F (QuiesceDbTest, MultiRankQuiesce)
13511417{
0 commit comments