Skip to content

Commit 4ab40ea

Browse files
committed
mds/quiesce-agt: add test for a rapid async ack
In this scenario, the agent thread is able to run and generate an ack before the db_update call returns to the caller. Fixes: https://tracker.ceph.com/issues/66219 Signed-off-by: Leonid Usov <[email protected]>
1 parent f24c0dc commit 4ab40ea

File tree

2 files changed

+93
-37
lines changed

2 files changed

+93
-37
lines changed

src/mds/QuiesceAgent.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ class QuiesceAgent {
237237
return std::max(current.db_version, pending.db_version);
238238
}
239239

240-
void set_pending_roots(QuiesceDbVersion db_version, TrackedRoots&& new_roots);
240+
virtual void set_pending_roots(QuiesceDbVersion db_version, TrackedRoots&& new_roots);
241241

242242
void set_upkeep_needed();
243243

src/test/mds/TestQuiesceAgent.cc

Lines changed: 92 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,21 @@
1010
*
1111
*/
1212
#include "common/Cond.h"
13+
#include "common/debug.h"
1314
#include "mds/QuiesceAgent.h"
1415
#include "gtest/gtest.h"
1516
#include <algorithm>
1617
#include <functional>
18+
#include <future>
1719
#include <queue>
1820
#include <ranges>
1921
#include <system_error>
2022
#include <thread>
21-
#include <future>
23+
24+
#define dout_context g_ceph_context
25+
#define dout_subsys ceph_subsys_mds_quiesce
26+
#undef dout_prefix
27+
#define dout_prefix *_dout << "== test == "
2228

2329
class QuiesceAgentTest : public testing::Test {
2430
using RequestHandle = QuiesceInterface::RequestHandle;
@@ -70,8 +76,24 @@ class QuiesceAgentTest : public testing::Test {
7076
(*f)(pending, current);
7177
}
7278
}
79+
80+
bool wait_for_agent_in_set_roots = false;
81+
void set_pending_roots(QuiesceDbVersion db_version, TrackedRoots&& new_roots) override {
82+
// just like the original version,
83+
// but allows to simulate a case when the context
84+
// switches to the agent thread and processes the new version
85+
// before the calling has the chance to continue
86+
87+
QuiesceAgent::set_pending_roots(db_version, std::move(new_roots));
88+
89+
while(wait_for_agent_in_set_roots && db_version != await_idle()) {
90+
dout(3) << __func__ << ": awaiting agent on version " << db_version << dendl;
91+
}
92+
}
93+
94+
ControlInterface& get_control_interface() { return quiesce_control; }
7395
};
74-
QuiesceMap latest_ack;
96+
QuiesceMap async_ack;
7597
std::unordered_map<QuiesceRoot, QuiescingRoot> quiesce_requests;
7698
ceph_tid_t last_tid;
7799
std::mutex mutex;
@@ -134,7 +156,7 @@ class QuiesceAgentTest : public testing::Test {
134156

135157
ci.agent_ack = [this](QuiesceMap const& update) {
136158
std::lock_guard l(mutex);
137-
latest_ack = update;
159+
async_ack = update;
138160
return 0;
139161
};
140162

@@ -157,26 +179,32 @@ class QuiesceAgentTest : public testing::Test {
157179

158180
using R = QuiesceMap::Roots::value_type;
159181
using RootInitList = std::initializer_list<R>;
182+
enum struct WaitForAgent { IfAsync, No };
160183

161-
std::optional<QuiesceMap> update(QuiesceDbVersion v, RootInitList roots)
184+
std::optional<QuiesceMap> update(QuiesceDbVersion v, RootInitList roots, WaitForAgent wait = WaitForAgent::IfAsync)
162185
{
163186
QuiesceMap map(v, QuiesceMap::Roots { roots });
164187

165188
if (agent->db_update(map)) {
166189
return map;
167190
}
168191

169-
return std::nullopt;
192+
if (WaitForAgent::No == wait) {
193+
return std::nullopt;
194+
} else {
195+
assert(await_idle_v(v.set_version));
196+
return async_ack;
197+
}
170198
}
171199

172-
std::optional<QuiesceMap> update(QuiesceSetVersion v, RootInitList roots)
200+
std::optional<QuiesceMap> update(QuiesceSetVersion v, RootInitList roots, WaitForAgent wait = WaitForAgent::IfAsync)
173201
{
174-
return update(QuiesceDbVersion { 1, v }, roots);
202+
return update(QuiesceDbVersion { 1, v }, roots, wait);
175203
}
176204

177-
std::optional<QuiesceMap> update(RootInitList roots)
205+
std::optional<QuiesceMap> update(RootInitList roots, WaitForAgent wait = WaitForAgent::IfAsync)
178206
{
179-
return update(agent->get_latest_version() + 1, roots);
207+
return update({1, agent->get_latest_version().set_version + 1}, roots, wait);
180208
}
181209

182210
template <class _Rep = std::chrono::seconds::rep, class _Period = std::chrono::seconds::period, typename D = std::chrono::duration<_Rep, _Period>>
@@ -338,22 +366,22 @@ TEST_F(QuiesceAgentTest, QuiesceProtocol) {
338366

339367
EXPECT_TRUE(await_idle());
340368
// we should have seen an ack sent
341-
EXPECT_EQ(1, latest_ack.db_version);
342-
EXPECT_EQ(1, latest_ack.roots.size());
343-
EXPECT_EQ(QS_QUIESCED, latest_ack.roots.at("root1").state);
369+
EXPECT_EQ(1, async_ack.db_version);
370+
EXPECT_EQ(1, async_ack.roots.size());
371+
EXPECT_EQ(QS_QUIESCED, async_ack.roots.at("root1").state);
344372

345-
latest_ack.clear();
373+
async_ack.clear();
346374

347375
// complete the other root with failure
348376
EXPECT_TRUE(complete_quiesce("root2", -1));
349377

350378
EXPECT_TRUE(await_idle());
351-
EXPECT_EQ(1, latest_ack.db_version);
352-
ASSERT_EQ(2, latest_ack.roots.size());
353-
EXPECT_EQ(QS_QUIESCED, latest_ack.roots.at("root1").state);
354-
EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root2").state);
379+
EXPECT_EQ(1, async_ack.db_version);
380+
ASSERT_EQ(2, async_ack.roots.size());
381+
EXPECT_EQ(QS_QUIESCED, async_ack.roots.at("root1").state);
382+
EXPECT_EQ(QS_FAILED, async_ack.roots.at("root2").state);
355383

356-
latest_ack.clear();
384+
async_ack.clear();
357385

358386
// complete the third root with success
359387
// complete one root with success
@@ -362,11 +390,11 @@ TEST_F(QuiesceAgentTest, QuiesceProtocol) {
362390
EXPECT_TRUE(await_idle());
363391

364392
// we should see the two quiesced roots in the ack,
365-
EXPECT_EQ(1, latest_ack.db_version);
366-
ASSERT_EQ(3, latest_ack.roots.size());
367-
EXPECT_EQ(QS_QUIESCED, latest_ack.roots.at("root1").state);
368-
EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root2").state);
369-
EXPECT_EQ(QS_QUIESCED, latest_ack.roots.at("root3").state);
393+
EXPECT_EQ(1, async_ack.db_version);
394+
ASSERT_EQ(3, async_ack.roots.size());
395+
EXPECT_EQ(QS_QUIESCED, async_ack.roots.at("root1").state);
396+
EXPECT_EQ(QS_FAILED, async_ack.roots.at("root2").state);
397+
EXPECT_EQ(QS_QUIESCED, async_ack.roots.at("root3").state);
370398

371399
{
372400
auto ack = update(2, {
@@ -380,7 +408,7 @@ TEST_F(QuiesceAgentTest, QuiesceProtocol) {
380408
// root2 is still quiescing, no updates for it
381409
// root3 is released asyncrhonously so for now it should be QUIESCED
382410
ASSERT_EQ(2, ack->roots.size());
383-
EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root2").state);
411+
EXPECT_EQ(QS_FAILED, async_ack.roots.at("root2").state);
384412
EXPECT_EQ(QS_QUIESCED, ack->roots.at("root3").state);
385413
}
386414

@@ -466,7 +494,7 @@ TEST_F(QuiesceAgentTest, DuplicateQuiesceRequest) {
466494
EXPECT_TRUE(quiesce_requests.contains("root1"));
467495
EXPECT_TRUE(quiesce_requests.contains("root2"));
468496

469-
latest_ack.clear();
497+
async_ack.clear();
470498
// now, bring the roots back
471499
{
472500
auto ack = update(3, {
@@ -486,10 +514,10 @@ TEST_F(QuiesceAgentTest, DuplicateQuiesceRequest) {
486514

487515
// root1 and root2 are still registered internally
488516
// so it should result in a failure to quiesce them again
489-
EXPECT_EQ(3, latest_ack.db_version);
490-
EXPECT_EQ(2, latest_ack.roots.size());
491-
EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root1").state);
492-
EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root2").state);
517+
EXPECT_EQ(3, async_ack.db_version);
518+
EXPECT_EQ(2, async_ack.roots.size());
519+
EXPECT_EQ(QS_FAILED, async_ack.roots.at("root1").state);
520+
EXPECT_EQ(QS_FAILED, async_ack.roots.at("root2").state);
493521

494522
// the actual state of the pinned objects shouldn't have changed
495523
EXPECT_EQ(QS_QUIESCED, pinned1->get_actual_state());
@@ -508,11 +536,11 @@ TEST_F(QuiesceAgentTest, DuplicateQuiesceRequest) {
508536
EXPECT_TRUE(complete_quiesce("root3"));
509537

510538
EXPECT_TRUE(await_idle());
511-
EXPECT_EQ(3, latest_ack.db_version);
512-
EXPECT_EQ(3, latest_ack.roots.size());
513-
EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root1").state);
514-
EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root2").state);
515-
EXPECT_EQ(QS_QUIESCED, latest_ack.roots.at("root3").state);
539+
EXPECT_EQ(3, async_ack.db_version);
540+
EXPECT_EQ(3, async_ack.roots.size());
541+
EXPECT_EQ(QS_FAILED, async_ack.roots.at("root1").state);
542+
EXPECT_EQ(QS_FAILED, async_ack.roots.at("root2").state);
543+
EXPECT_EQ(QS_QUIESCED, async_ack.roots.at("root3").state);
516544
}
517545

518546
TEST_F(QuiesceAgentTest, TimeoutBeforeComplete)
@@ -594,12 +622,40 @@ TEST_F(QuiesceAgentTest, RapidDbUpdates)
594622
// nothing should be in the ack
595623
// if we incorrectly submit root1 twice
596624
// then it should be repored here as FAILED
597-
EXPECT_EQ(2, latest_ack.db_version);
598-
EXPECT_EQ(0, latest_ack.roots.size());
625+
EXPECT_EQ(2, async_ack.db_version);
626+
EXPECT_EQ(0, async_ack.roots.size());
599627

600628
{
601629
auto tracked = agent->tracked_roots();
602630
EXPECT_EQ(2, tracked.size());
603631
}
604632
}
605633

634+
TEST_F(QuiesceAgentTest, RapidAsyncAck)
635+
{
636+
// This validates that if the agent thread manages to
637+
// process a db update and generate a QUIESCED ack
638+
// before the updating thread gets the CPU to progress,
639+
// then the outdated synchronous ack is not sent
640+
641+
agent->wait_for_agent_in_set_roots = true;
642+
643+
// make the agent complete the request synchronosuly with the submit
644+
auto && old_submit = agent->get_control_interface().submit_request;
645+
agent->get_control_interface().submit_request = [this, old_submit = std::move(old_submit)](QuiesceRoot root, Context* ctx) {
646+
auto result = old_submit(root, ctx);
647+
dout(10) << "quiescing the root `" << root << "` in submit" << dendl;
648+
complete_quiesce(root, 0);
649+
return result;
650+
};
651+
652+
auto ack = update(1, {
653+
{ "root1", QS_QUIESCING },
654+
});
655+
656+
auto && latest_ack = ack.value_or(async_ack);
657+
658+
EXPECT_EQ(1, latest_ack.db_version);
659+
ASSERT_EQ(1, latest_ack.roots.size());
660+
EXPECT_EQ(QS_QUIESCED, latest_ack.roots.at("root1").state);
661+
}

0 commit comments

Comments
 (0)