Skip to content

Commit f24c0dc

Browse files
committed
mds/quiesce: always abort fragmenting asynchronously to prevent reentrancy
Fixes: https://tracker.ceph.com/issues/66208 Signed-off-by: Leonid Usov <[email protected]>
1 parent da5c263 commit f24c0dc

File tree

3 files changed

+23
-20
lines changed

3 files changed

+23
-20
lines changed

src/mds/MDCache.cc

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13563,7 +13563,7 @@ void MDCache::clear_dirty_bits_for_stray(CInode* diri) {
1356313563
}
1356413564
}
1356513565

13566-
void MDCache::quiesce_overdrive_fragmenting(CDir* dir, bool async) {
13566+
void MDCache::quiesce_overdrive_fragmenting_async(CDir* dir) {
1356713567
if (!dir || !dir->state_test(CDir::STATE_FRAGMENTING)) {
1356813568
return;
1356913569
}
@@ -13578,21 +13578,22 @@ void MDCache::quiesce_overdrive_fragmenting(CDir* dir, bool async) {
1357813578
dout(20) << __func__ << ": dirfrag " << it->first << " contains my dirfrag " << mydf << dendl;
1357913579
auto const& mdr = it->second.mdr;
1358013580

13581-
if (async) {
13582-
dout(10) << __func__ << ": will schedule async abort_if_freezing for " << *mdr << dendl;
13583-
mds->queue_waiter(new MDSInternalContextWrapper(mds, new LambdaContext( [this, mdr] {
13584-
if (!mdr->dead) {
13585-
dispatch_fragment_dir(mdr, true);
13581+
dout(10) << __func__ << ": will schedule an async abort_if_freezing for mdr " << *mdr << dendl;
13582+
mds->queue_waiter(new MDSInternalContextWrapper(mds, new LambdaContext([this, basefrag=it->first, mdr](){
13583+
if (!mdr->is_live()) {
13584+
dout(20) << "quiesce_overdrive_fragmenting_async: bailing out, mdr " << *mdr << "is dead: " << mdr->dead << "; killed: " << mdr->killed << dendl;
13585+
return;
13586+
}
13587+
if (auto it = fragments.find(basefrag); it != fragments.end() && it->second.mdr == mdr) {
13588+
if (it->second.all_frozen) {
13589+
dout(20) << "quiesce_overdrive_fragmenting_async: too late, won't abort mdr " << *mdr << dendl;
13590+
} else {
13591+
dout(20) << "quiesce_overdrive_fragmenting_async: will abort mdr " << *mdr << dendl;
13592+
mdr->aborted = true;
13593+
dispatch_fragment_dir(mdr);
1358613594
}
13587-
})));
13588-
} else {
13589-
if (mdr->dead) {
13590-
dout(20) << __func__ << ": the request is already dead: " << *mdr << dendl;
13591-
} else {
13592-
dout(10) << __func__ << ": will call abort_if_freezing for " << *mdr << dendl;
13593-
dispatch_fragment_dir(mdr, true);
1359413595
}
13595-
}
13596+
})));
1359613597

1359713598
// there can't be (shouldn't be) more than one containing fragment
1359813599
break;
@@ -13756,8 +13757,9 @@ void MDCache::dispatch_quiesce_inode(const MDRequestRef& mdr)
1375613757
std::vector<MDRequestRef> todispatch;
1375713758
for (auto& dir : in->get_dirfrags()) {
1375813759
dout(25) << " iterating " << *dir << dendl;
13759-
// overdrive syncrhonously since we aren't yet on the waiting list
13760-
quiesce_overdrive_fragmenting(dir, false);
13760+
// we could be woken up by a finished fragmenting that's now cleaning up
13761+
// and completing the waiter list, so we should attempt the abort asynchronosuly
13762+
quiesce_overdrive_fragmenting_async(dir);
1376113763
migrator->quiesce_overdrive_export(dir);
1376213764
for (auto& [dnk, dn] : *dir) {
1376313765
dout(25) << " evaluating (" << dnk << ", " << *dn << ")" << dendl;
@@ -13880,9 +13882,9 @@ void MDCache::dispatch_quiesce_path(const MDRequestRef& mdr)
1388013882
CDir* curdir = nullptr;
1388113883
int r = path_traverse(mdr, cf, mdr->get_filepath(), ptflags, nullptr, &rooti, &curdir);
1388213884
if (r > 0) {
13883-
// we must abort asyncrhonously, since we may be on the unfreeze waiter list,
13884-
// which whill be flushed syncrhonously with the abort
13885-
quiesce_overdrive_fragmenting(curdir, true);
13885+
// since we may be on the unfreeze waiter list,
13886+
// we should abort fragmenting asynchronously
13887+
quiesce_overdrive_fragmenting_async(curdir);
1388613888
return;
1388713889
} else if (r < 0) {
1388813890
mds->server->respond_to_request(mdr, r);

src/mds/MDCache.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1478,7 +1478,7 @@ class MDCache {
14781478
void finish_uncommitted_fragment(dirfrag_t basedirfrag, int op);
14791479
void rollback_uncommitted_fragment(dirfrag_t basedirfrag, frag_vec_t&& old_frags);
14801480

1481-
void quiesce_overdrive_fragmenting(CDir* dir, bool async);
1481+
void quiesce_overdrive_fragmenting_async(CDir* dir);
14821482
void dispatch_quiesce_path(const MDRequestRef& mdr);
14831483
void dispatch_quiesce_inode(const MDRequestRef& mdr);
14841484

src/mds/Mutation.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,7 @@ struct MDRequestImpl : public MutationImpl {
398398

399399
More* more();
400400
More const* more() const;
401+
bool is_live() const { return !(killed || dead); }
401402
bool has_more() const;
402403
bool has_witnesses();
403404
bool peer_did_prepare();

0 commit comments

Comments
 (0)