1010 *
1111 */
1212#include " common/Cond.h"
13+ #include " common/debug.h"
1314#include " mds/QuiesceAgent.h"
1415#include " gtest/gtest.h"
1516#include < algorithm>
1617#include < functional>
18+ #include < future>
1719#include < queue>
1820#include < ranges>
1921#include < system_error>
2022#include < thread>
21- #include < future>
23+
24+ #define dout_context g_ceph_context
25+ #define dout_subsys ceph_subsys_mds_quiesce
26+ #undef dout_prefix
27+ #define dout_prefix *_dout << " == test == "
2228
2329class QuiesceAgentTest : public testing ::Test {
2430 using RequestHandle = QuiesceInterface::RequestHandle;
@@ -70,8 +76,24 @@ class QuiesceAgentTest : public testing::Test {
7076 (*f)(pending, current);
7177 }
7278 }
79+
80+ bool wait_for_agent_in_set_roots = false ;
81+ void set_pending_roots (QuiesceDbVersion db_version, TrackedRoots&& new_roots) override {
82+ // just like the original version,
83+ // but allows to simulate a case when the context
84+ // switches to the agent thread and processes the new version
85+ // before the calling has the chance to continue
86+
87+ QuiesceAgent::set_pending_roots (db_version, std::move (new_roots));
88+
89+ while (wait_for_agent_in_set_roots && db_version != await_idle ()) {
90+ dout (3 ) << __func__ << " : awaiting agent on version " << db_version << dendl;
91+ }
92+ }
93+
94+ ControlInterface& get_control_interface () { return quiesce_control; }
7395 };
74- QuiesceMap latest_ack ;
96+ QuiesceMap async_ack ;
7597 std::unordered_map<QuiesceRoot, QuiescingRoot> quiesce_requests;
7698 ceph_tid_t last_tid;
7799 std::mutex mutex;
@@ -134,7 +156,7 @@ class QuiesceAgentTest : public testing::Test {
134156
135157 ci.agent_ack = [this ](QuiesceMap const & update) {
136158 std::lock_guard l (mutex);
137- latest_ack = update;
159+ async_ack = update;
138160 return 0 ;
139161 };
140162
@@ -157,26 +179,32 @@ class QuiesceAgentTest : public testing::Test {
157179
158180 using R = QuiesceMap::Roots::value_type;
159181 using RootInitList = std::initializer_list<R>;
182+ enum struct WaitForAgent { IfAsync, No };
160183
161- std::optional<QuiesceMap> update (QuiesceDbVersion v, RootInitList roots)
184+ std::optional<QuiesceMap> update (QuiesceDbVersion v, RootInitList roots, WaitForAgent wait = WaitForAgent::IfAsync )
162185 {
163186 QuiesceMap map (v, QuiesceMap::Roots { roots });
164187
165188 if (agent->db_update (map)) {
166189 return map;
167190 }
168191
169- return std::nullopt ;
192+ if (WaitForAgent::No == wait) {
193+ return std::nullopt ;
194+ } else {
195+ assert (await_idle_v (v.set_version ));
196+ return async_ack;
197+ }
170198 }
171199
172- std::optional<QuiesceMap> update (QuiesceSetVersion v, RootInitList roots)
200+ std::optional<QuiesceMap> update (QuiesceSetVersion v, RootInitList roots, WaitForAgent wait = WaitForAgent::IfAsync )
173201 {
174- return update (QuiesceDbVersion { 1 , v }, roots);
202+ return update (QuiesceDbVersion { 1 , v }, roots, wait );
175203 }
176204
177- std::optional<QuiesceMap> update (RootInitList roots)
205+ std::optional<QuiesceMap> update (RootInitList roots, WaitForAgent wait = WaitForAgent::IfAsync )
178206 {
179- return update (agent->get_latest_version () + 1 , roots);
207+ return update ({ 1 , agent->get_latest_version (). set_version + 1 } , roots, wait );
180208 }
181209
182210 template <class _Rep = std::chrono::seconds::rep, class _Period = std::chrono::seconds::period, typename D = std::chrono::duration<_Rep, _Period>>
@@ -338,22 +366,22 @@ TEST_F(QuiesceAgentTest, QuiesceProtocol) {
338366
339367 EXPECT_TRUE (await_idle ());
340368 // we should have seen an ack sent
341- EXPECT_EQ (1 , latest_ack .db_version );
342- EXPECT_EQ (1 , latest_ack .roots .size ());
343- EXPECT_EQ (QS_QUIESCED, latest_ack .roots .at (" root1" ).state );
369+ EXPECT_EQ (1 , async_ack .db_version );
370+ EXPECT_EQ (1 , async_ack .roots .size ());
371+ EXPECT_EQ (QS_QUIESCED, async_ack .roots .at (" root1" ).state );
344372
345- latest_ack .clear ();
373+ async_ack .clear ();
346374
347375 // complete the other root with failure
348376 EXPECT_TRUE (complete_quiesce (" root2" , -1 ));
349377
350378 EXPECT_TRUE (await_idle ());
351- EXPECT_EQ (1 , latest_ack .db_version );
352- ASSERT_EQ (2 , latest_ack .roots .size ());
353- EXPECT_EQ (QS_QUIESCED, latest_ack .roots .at (" root1" ).state );
354- EXPECT_EQ (QS_FAILED, latest_ack .roots .at (" root2" ).state );
379+ EXPECT_EQ (1 , async_ack .db_version );
380+ ASSERT_EQ (2 , async_ack .roots .size ());
381+ EXPECT_EQ (QS_QUIESCED, async_ack .roots .at (" root1" ).state );
382+ EXPECT_EQ (QS_FAILED, async_ack .roots .at (" root2" ).state );
355383
356- latest_ack .clear ();
384+ async_ack .clear ();
357385
358386 // complete the third root with success
359387 // complete one root with success
@@ -362,11 +390,11 @@ TEST_F(QuiesceAgentTest, QuiesceProtocol) {
362390 EXPECT_TRUE (await_idle ());
363391
364392 // we should see the two quiesced roots in the ack,
365- EXPECT_EQ (1 , latest_ack .db_version );
366- ASSERT_EQ (3 , latest_ack .roots .size ());
367- EXPECT_EQ (QS_QUIESCED, latest_ack .roots .at (" root1" ).state );
368- EXPECT_EQ (QS_FAILED, latest_ack .roots .at (" root2" ).state );
369- EXPECT_EQ (QS_QUIESCED, latest_ack .roots .at (" root3" ).state );
393+ EXPECT_EQ (1 , async_ack .db_version );
394+ ASSERT_EQ (3 , async_ack .roots .size ());
395+ EXPECT_EQ (QS_QUIESCED, async_ack .roots .at (" root1" ).state );
396+ EXPECT_EQ (QS_FAILED, async_ack .roots .at (" root2" ).state );
397+ EXPECT_EQ (QS_QUIESCED, async_ack .roots .at (" root3" ).state );
370398
371399 {
372400 auto ack = update (2 , {
@@ -380,7 +408,7 @@ TEST_F(QuiesceAgentTest, QuiesceProtocol) {
380408 // root2 is still quiescing, no updates for it
381409 // root3 is released asyncrhonously so for now it should be QUIESCED
382410 ASSERT_EQ (2 , ack->roots .size ());
383- EXPECT_EQ (QS_FAILED, latest_ack .roots .at (" root2" ).state );
411+ EXPECT_EQ (QS_FAILED, async_ack .roots .at (" root2" ).state );
384412 EXPECT_EQ (QS_QUIESCED, ack->roots .at (" root3" ).state );
385413 }
386414
@@ -466,7 +494,7 @@ TEST_F(QuiesceAgentTest, DuplicateQuiesceRequest) {
466494 EXPECT_TRUE (quiesce_requests.contains (" root1" ));
467495 EXPECT_TRUE (quiesce_requests.contains (" root2" ));
468496
469- latest_ack .clear ();
497+ async_ack .clear ();
470498 // now, bring the roots back
471499 {
472500 auto ack = update (3 , {
@@ -486,10 +514,10 @@ TEST_F(QuiesceAgentTest, DuplicateQuiesceRequest) {
486514
487515 // root1 and root2 are still registered internally
488516 // so it should result in a failure to quiesce them again
489- EXPECT_EQ (3 , latest_ack .db_version );
490- EXPECT_EQ (2 , latest_ack .roots .size ());
491- EXPECT_EQ (QS_FAILED, latest_ack .roots .at (" root1" ).state );
492- EXPECT_EQ (QS_FAILED, latest_ack .roots .at (" root2" ).state );
517+ EXPECT_EQ (3 , async_ack .db_version );
518+ EXPECT_EQ (2 , async_ack .roots .size ());
519+ EXPECT_EQ (QS_FAILED, async_ack .roots .at (" root1" ).state );
520+ EXPECT_EQ (QS_FAILED, async_ack .roots .at (" root2" ).state );
493521
494522 // the actual state of the pinned objects shouldn't have changed
495523 EXPECT_EQ (QS_QUIESCED, pinned1->get_actual_state ());
@@ -508,11 +536,11 @@ TEST_F(QuiesceAgentTest, DuplicateQuiesceRequest) {
508536 EXPECT_TRUE (complete_quiesce (" root3" ));
509537
510538 EXPECT_TRUE (await_idle ());
511- EXPECT_EQ (3 , latest_ack .db_version );
512- EXPECT_EQ (3 , latest_ack .roots .size ());
513- EXPECT_EQ (QS_FAILED, latest_ack .roots .at (" root1" ).state );
514- EXPECT_EQ (QS_FAILED, latest_ack .roots .at (" root2" ).state );
515- EXPECT_EQ (QS_QUIESCED, latest_ack .roots .at (" root3" ).state );
539+ EXPECT_EQ (3 , async_ack .db_version );
540+ EXPECT_EQ (3 , async_ack .roots .size ());
541+ EXPECT_EQ (QS_FAILED, async_ack .roots .at (" root1" ).state );
542+ EXPECT_EQ (QS_FAILED, async_ack .roots .at (" root2" ).state );
543+ EXPECT_EQ (QS_QUIESCED, async_ack .roots .at (" root3" ).state );
516544}
517545
518546TEST_F (QuiesceAgentTest, TimeoutBeforeComplete)
@@ -594,12 +622,40 @@ TEST_F(QuiesceAgentTest, RapidDbUpdates)
594622 // nothing should be in the ack
595623 // if we incorrectly submit root1 twice
596624 // then it should be repored here as FAILED
597- EXPECT_EQ (2 , latest_ack .db_version );
598- EXPECT_EQ (0 , latest_ack .roots .size ());
625+ EXPECT_EQ (2 , async_ack .db_version );
626+ EXPECT_EQ (0 , async_ack .roots .size ());
599627
600628 {
601629 auto tracked = agent->tracked_roots ();
602630 EXPECT_EQ (2 , tracked.size ());
603631 }
604632}
605633
634+ TEST_F (QuiesceAgentTest, RapidAsyncAck)
635+ {
636+ // This validates that if the agent thread manages to
637+ // process a db update and generate a QUIESCED ack
638+ // before the updating thread gets the CPU to progress,
639+ // then the outdated synchronous ack is not sent
640+
641+ agent->wait_for_agent_in_set_roots = true ;
642+
643+ // make the agent complete the request synchronosuly with the submit
644+ auto && old_submit = agent->get_control_interface ().submit_request ;
645+ agent->get_control_interface ().submit_request = [this , old_submit = std::move (old_submit)](QuiesceRoot root, Context* ctx) {
646+ auto result = old_submit (root, ctx);
647+ dout (10 ) << " quiescing the root `" << root << " ` in submit" << dendl;
648+ complete_quiesce (root, 0 );
649+ return result;
650+ };
651+
652+ auto ack = update (1 , {
653+ { " root1" , QS_QUIESCING },
654+ });
655+
656+ auto && latest_ack = ack.value_or (async_ack);
657+
658+ EXPECT_EQ (1 , latest_ack.db_version );
659+ ASSERT_EQ (1 , latest_ack.roots .size ());
660+ EXPECT_EQ (QS_QUIESCED, latest_ack.roots .at (" root1" ).state );
661+ }
0 commit comments