Skip to content

Commit 41406dc

Browse files
committed
mds/MDBalancer: ignore queued callbacks if MDS is not active
Signed-off-by: Leonid Usov <[email protected]> Fixes: https://tracker.ceph.com/issues/63281
1 parent 241e46a commit 41406dc

File tree

1 file changed

+24
-2
lines changed

1 file changed

+24
-2
lines changed

src/mds/MDBalancer.cc

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,8 @@ double MDBalancer::try_match(balance_state_t& state, mds_rank_t ex, double& maxe
565565

566566
void MDBalancer::queue_split(const CDir *dir, bool fast)
567567
{
568-
dout(10) << __func__ << " enqueuing " << *dir
568+
constexpr const auto &_func_ = __func__;
569+
dout(10) << _func_ << " enqueuing " << *dir
569570
<< " (fast=" << fast << ")" << dendl;
570571

571572
const dirfrag_t df = dir->dirfrag();
@@ -579,6 +580,16 @@ void MDBalancer::queue_split(const CDir *dir, bool fast)
579580
return;
580581
}
581582

583+
if (mds->is_stopping()) {
584+
// not a good time. This could have been (!mds->is_active())
585+
// or at least (mds->is_stopping() || mds->is_stopped()), but
586+
// is_stopped() is never true because an MDS respawns as soon as it's removed from the map;
587+
// the narrow is_stopping check is to avoid potential regressions
588+
// due to unknown coupling with other parts of the MDS (especially multiple ranks).
589+
dout(5) << "ignoring the " << _func_ << " callback because the MDS state is '" << ceph_mds_state_name(mds->get_state()) << "'" << dendl;
590+
return;
591+
}
592+
582593
auto mdcache = mds->mdcache;
583594

584595
CDir *dir = mdcache->get_dirfrag(df);
@@ -593,7 +604,7 @@ void MDBalancer::queue_split(const CDir *dir, bool fast)
593604

594605
// Pass on to MDCache: note that the split might still not
595606
// happen if the checks in MDCache::can_fragment fail.
596-
dout(10) << __func__ << " splitting " << *dir << dendl;
607+
dout(10) << _func_ << " splitting " << *dir << dendl;
597608
int bits = g_conf()->mds_bal_split_bits;
598609
if (dir->inode->is_ephemeral_dist()) {
599610
unsigned min_frag_bits = mdcache->get_ephemeral_dist_frag_bits();
@@ -623,6 +634,7 @@ void MDBalancer::queue_split(const CDir *dir, bool fast)
623634
void MDBalancer::queue_merge(CDir *dir)
624635
{
625636
const auto frag = dir->dirfrag();
637+
constexpr const auto &_func_ = __func__;
626638
auto callback = [this, frag](int r) {
627639
ceph_assert(frag.frag != frag_t());
628640

@@ -631,6 +643,16 @@ void MDBalancer::queue_merge(CDir *dir)
631643
// starting one), and this context is the only one that erases it.
632644
merge_pending.erase(frag);
633645

646+
if (mds->is_stopping()) {
647+
// not a good time. This could have been (!mds->is_active())
648+
// or at least (mds->is_stopping() || mds->is_stopped()), but
649+
// is_stopped() is never true because an MDS respawns as soon as it's removed from the map;
650+
// the narrow is_stopping check is to avoid potential regressions
651+
// due to unknown coupling with other parts of the MDS (especially multiple ranks).
652+
dout(5) << "ignoring the " << _func_ << " callback because the MDS state is '" << ceph_mds_state_name(mds->get_state()) << "'" << dendl;
653+
return;
654+
}
655+
634656
auto mdcache = mds->mdcache;
635657
CDir *dir = mdcache->get_dirfrag(frag);
636658
if (!dir) {

0 commit comments

Comments
 (0)