Skip to content

Commit 6b42c07

Browse files
authored
Merge pull request ceph#60684 from salieri11/wip-igolikov-asok-56442
mds: add MDS asok command for dumping stray directories
2 parents 4a25f99 + f858fa9 commit 6b42c07

File tree

8 files changed

+156
-17
lines changed

8 files changed

+156
-17
lines changed

doc/cephfs/scrub.rst

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,3 +162,30 @@ Evaluate strays using recursive scrub
162162
at root, run scrub with flags ``scrub_mdsdir`` and ``recursive``::
163163

164164
ceph tell mds.<fsname>:0 scrub start / recursive,scrub_mdsdir
165+
166+
Dump stray folder content
167+
=====================================
168+
169+
- In order to dump stray folder content on a specific MDS, use the following command::
170+
171+
ceph tell mds.<fsname>:0 dump stray
172+
{
173+
"strays": [
174+
{
175+
"ino": "0x100000001f7",
176+
"stray_prior_path": "/dir/dir1",
177+
"client_caps": [
178+
{
179+
"client_id": 4156,
180+
"pending": "pAsLsXsFscr",
181+
"issued": "pAsLsXsFscr",
182+
"wanted": "-",
183+
"last_sent": 3
184+
}
185+
],
186+
"loner": -1,
187+
"want_loner": -1,
188+
"mds_caps_wanted": [],
189+
"is_subvolume": false
190+
}
191+
]}

qa/tasks/cephfs/test_strays.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,3 +1037,32 @@ def test_replicated_delete_speed(self):
10371037

10381038
duration = (end - begin).total_seconds()
10391039
self.assertLess(duration, (file_count * tick_period) * 0.25)
1040+
1041+
def test_asok_dump_stray_command(self):
1042+
"""
1043+
Test MDS asok dump stray command
1044+
"""
1045+
1046+
LOW_LIMIT = 50
1047+
# need to create more folder to force fragmentation, creating more then needed
1048+
# to be on the safe side.
1049+
# we want to test the case when dumping stray folder must wait for the next dirfrag to be fetched
1050+
NUM_DIRS = LOW_LIMIT * 20
1051+
TOP_DIR = "topdir"
1052+
self.config_set("mds", "mds_bal_split_size", str(LOW_LIMIT))
1053+
self.assertEqual(self.config_get("mds", "mds_bal_split_size"), str(LOW_LIMIT), "LOW_LIMIT was not set on mds!")
1054+
1055+
# create 2 level tree with enough folders to force the stray folder be fragmented
1056+
# total of NUM_DIRS subdirs will be created
1057+
self.mount_a.run_shell(f"mkdir -p {TOP_DIR}/subdir{{1..{NUM_DIRS}}}")
1058+
# create snapshot
1059+
self.mount_a.run_shell(f"mkdir {TOP_DIR}/.snap/snap1")
1060+
1061+
# delete 2nd level dirs to generate strays
1062+
# don't wait, we want to dump stray dir while delete runs, to make it more interesting
1063+
self.mount_a.run_shell(f"rm -rf {TOP_DIR}/*", wait=False)
1064+
1065+
# wait for all deleted folders to become strays
1066+
self.wait_until_equal(
1067+
lambda: len(self.fs.rank_tell(["dump", "stray"])),
1068+
expect_val=NUM_DIRS, timeout=60, period=1)

src/mds/MDCache.cc

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10374,13 +10374,21 @@ void MDCache::notify_global_snaprealm_update(int snap_op)
1037410374

1037510375
struct C_MDC_RetryScanStray : public MDCacheContext {
1037610376
dirfrag_t next;
10377-
C_MDC_RetryScanStray(MDCache *c, dirfrag_t n) : MDCacheContext(c), next(n) { }
10377+
std::unique_ptr<MDCache::C_MDS_DumpStrayDirCtx> cmd_ctx;
10378+
C_MDC_RetryScanStray(MDCache *c, dirfrag_t n, std::unique_ptr<MDCache::C_MDS_DumpStrayDirCtx> ctx) :
10379+
MDCacheContext(c), next(n), cmd_ctx(std::move(ctx)) {}
1037810380
void finish(int r) override {
10379-
mdcache->scan_stray_dir(next);
10381+
mdcache->scan_stray_dir(next, std::move(cmd_ctx));
1038010382
}
1038110383
};
1038210384

10383-
void MDCache::scan_stray_dir(dirfrag_t next)
10385+
/*
10386+
* If the cmd_ctx is not nullptr, the caller is asok command handler,
10387+
* which will block until the on_finish will be called.
10388+
* The cmd_ctx holds the formatter to dump stray dir content while scanning.
10389+
* The function can return EAGAIN, to make possible waiting semantics clear.
10390+
*/
10391+
int MDCache::scan_stray_dir(dirfrag_t next, std::unique_ptr<MDCache::C_MDS_DumpStrayDirCtx> cmd_ctx)
1038410392
{
1038510393
dout(10) << "scan_stray_dir " << next << dendl;
1038610394

@@ -10399,13 +10407,13 @@ void MDCache::scan_stray_dir(dirfrag_t next)
1039910407
continue;
1040010408

1040110409
if (!dir->can_auth_pin()) {
10402-
dir->add_waiter(CDir::WAIT_UNFREEZE, new C_MDC_RetryScanStray(this, dir->dirfrag()));
10403-
return;
10410+
dir->add_waiter(CDir::WAIT_UNFREEZE, new C_MDC_RetryScanStray(this, dir->dirfrag(), std::move(cmd_ctx)));
10411+
return -EAGAIN;
1040410412
}
1040510413

1040610414
if (!dir->is_complete()) {
10407-
dir->fetch(new C_MDC_RetryScanStray(this, dir->dirfrag()));
10408-
return;
10415+
dir->fetch(new C_MDC_RetryScanStray(this, dir->dirfrag(), std::move(cmd_ctx)));
10416+
return -EAGAIN;
1040910417
}
1041010418

1041110419
for (auto &p : dir->items) {
@@ -10414,14 +10422,32 @@ void MDCache::scan_stray_dir(dirfrag_t next)
1041410422
CDentry::linkage_t *dnl = dn->get_projected_linkage();
1041510423
if (dnl->is_primary()) {
1041610424
CInode *in = dnl->get_inode();
10425+
// only if we came from asok cmd handler
10426+
if (cmd_ctx) {
10427+
cmd_ctx->begin_dump();
10428+
cmd_ctx->get_formatter()->open_object_section("stray_inode");
10429+
cmd_ctx->get_formatter()->dump_int("ino: ", in->ino());
10430+
cmd_ctx->get_formatter()->dump_string("stray_prior_path: ", in->get_inode()->stray_prior_path);
10431+
in->dump(cmd_ctx->get_formatter(), CInode::DUMP_CAPS);
10432+
cmd_ctx->get_formatter()->close_section();
10433+
}
1041710434
if (in->get_inode()->nlink == 0)
1041810435
in->state_set(CInode::STATE_ORPHAN);
10419-
maybe_eval_stray(in);
10436+
// no need to evaluate stray when dumping the dir content
10437+
if (!cmd_ctx) {
10438+
maybe_eval_stray(in);
10439+
}
1042010440
}
1042110441
}
1042210442
}
1042310443
next.frag = frag_t();
1042410444
}
10445+
// only if we came from asok cmd handler
10446+
if (cmd_ctx) {
10447+
cmd_ctx->end_dump();
10448+
cmd_ctx->finish(0);
10449+
}
10450+
return 0;
1042510451
}
1042610452

1042710453
void MDCache::fetch_backtrace(inodeno_t ino, int64_t pool, bufferlist& bl, Context *fin)
@@ -10432,9 +10458,10 @@ void MDCache::fetch_backtrace(inodeno_t ino, int64_t pool, bufferlist& bl, Conte
1043210458
mds->logger->inc(l_mds_openino_backtrace_fetch);
1043310459
}
1043410460

10435-
10436-
10437-
10461+
int MDCache::stray_status(std::unique_ptr<C_MDS_DumpStrayDirCtx> ctx)
10462+
{
10463+
return scan_stray_dir(dirfrag_t(), std::move(ctx));
10464+
}
1043810465

1043910466
// ========================================================================================
1044010467
// DISCOVER

src/mds/MDCache.h

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,40 @@ class MDCache {
642642
bool ap_freeze = false;
643643
};
644644

645+
/**
646+
* Helper wrapper, provides both context object with finish function
647+
* and placeholder for formatter. Better alternative to passing formatter as another argument
648+
* to the MDSCache function
649+
*/
650+
class C_MDS_DumpStrayDirCtx : public MDSInternalContext {
651+
public:
652+
void finish(int r) override {
653+
ceph_assert(on_finish);
654+
MDSContext::finish(r);
655+
on_finish(r);
656+
}
657+
Formatter* get_formatter() const {
658+
ceph_assert(dump_formatter);
659+
return dump_formatter;
660+
}
661+
void begin_dump() {
662+
if(!started) {
663+
started = true;
664+
get_formatter()->open_array_section("strays");
665+
}
666+
}
667+
void end_dump() {
668+
get_formatter()->close_section();
669+
}
670+
C_MDS_DumpStrayDirCtx(MDCache *c, Formatter* f, std::function<void(int)>&& ext_on_finish) :
671+
MDSInternalContext(c->mds), cache(c), dump_formatter(f), on_finish(std::move(ext_on_finish)) {}
672+
private:
673+
MDCache *cache;
674+
Formatter* dump_formatter;
675+
std::function<void(int)> on_finish;
676+
bool started = false;
677+
};
678+
645679
MDRequestRef lock_path(LockPathConfig config, std::function<void(MDRequestRef const& mdr)> on_locked = {});
646680

647681
void clean_open_file_lists();
@@ -1073,6 +1107,7 @@ class MDCache {
10731107
void dump_tree(CInode *in, const int cur_depth, const int max_depth, Formatter *f);
10741108

10751109
void cache_status(Formatter *f);
1110+
int stray_status(std::unique_ptr<C_MDS_DumpStrayDirCtx> ctx);
10761111

10771112
void dump_resolve_status(Formatter *f) const;
10781113
void dump_rejoin_status(Formatter *f) const;
@@ -1283,7 +1318,7 @@ class MDCache {
12831318
void handle_open_ino(const cref_t<MMDSOpenIno> &m, int err=0);
12841319
void handle_open_ino_reply(const cref_t<MMDSOpenInoReply> &m);
12851320

1286-
void scan_stray_dir(dirfrag_t next=dirfrag_t());
1321+
int scan_stray_dir(dirfrag_t next=dirfrag_t(), std::unique_ptr<C_MDS_DumpStrayDirCtx> ctx = nullptr);
12871322
// -- replicas --
12881323
void handle_discover(const cref_t<MDiscover> &dis);
12891324
void handle_discover_reply(const cref_t<MDiscoverReply> &m);

src/mds/MDSContext.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,12 @@
2121
#define dout_context g_ceph_context
2222
#define dout_subsys ceph_subsys_mds
2323

24-
void MDSContext::complete(int r) {
24+
void MDSContext::finish(int r) {
2525
MDSRank *mds = get_mds();
2626
ceph_assert(mds != nullptr);
2727
ceph_assert(ceph_mutex_is_locked_by_me(mds->mds_lock));
28-
dout(10) << "MDSContext::complete: " << typeid(*this).name() << dendl;
28+
dout(10) << "MDSContext::finish: " << typeid(*this).name() << dendl;
2929
mds->heartbeat_reset();
30-
return Context::complete(r);
3130
}
3231

3332
void MDSInternalContextWrapper::finish(int r)

src/mds/MDSContext.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ template<template<typename> class A>
4444
using que_alloc = std::deque<MDSContext*, A<MDSContext*>>;
4545
using que = que_alloc<std::allocator>;
4646

47-
void complete(int r) override;
47+
void finish(int r) override;
4848
virtual MDSRank *get_mds() = 0;
4949
};
5050

src/mds/MDSDaemon.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,11 @@ void MDSDaemon::set_up_admin_socket()
544544
asok_hook,
545545
"run cpu profiling on daemon");
546546
ceph_assert(r == 0);
547+
r = admin_socket->register_command(
548+
"dump stray",
549+
asok_hook,
550+
"dump stray folder content");
551+
ceph_assert(r == 0);
547552
}
548553

549554
void MDSDaemon::clean_up_admin_socket()

src/mds/MDSRank.cc

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3070,6 +3070,23 @@ void MDSRankDispatcher::handle_asok_command(
30703070
} else if (command == "quiesce db") {
30713071
command_quiesce_db(cmdmap, on_finish);
30723072
return;
3073+
} else if (command == "dump stray") {
3074+
dout(10) << "dump_stray start" << dendl;
3075+
// the context is a wrapper for formatter to be used while scanning stray dir
3076+
auto context = std::make_unique<MDCache::C_MDS_DumpStrayDirCtx>(mdcache, f,
3077+
[this,on_finish](int r) {
3078+
// completion callback, will be called when scan is done
3079+
dout(10) << "dump_stray done" << dendl;
3080+
bufferlist bl;
3081+
on_finish(r, "", bl);
3082+
});
3083+
std::lock_guard l(mds_lock);
3084+
r = mdcache->stray_status(std::move(context));
3085+
// since the scanning op can be async, we want to know it, for better semantics
3086+
if (r == -EAGAIN) {
3087+
dout(10) << "dump_stray wait" << dendl;
3088+
}
3089+
return;
30733090
} else {
30743091
r = -ENOSYS;
30753092
}
@@ -3506,7 +3523,7 @@ void MDSRank::command_quiesce_path(Formatter* f, const cmdmap_t& cmdmap, asok_fi
35063523

35073524
// This is a little ugly, apologies.
35083525
// We should still be under the mds lock for this test to be valid.
3509-
// MDCache will delete the quiesce_ctx if it manages to complete syncrhonously,
3526+
// MDCache will delete the quiesce_ctx if it manages
35103527
// so we are testing the `mdr->internal_op_finish` to see if that has happend
35113528
if (!await && mdr && mdr->internal_op_finish) {
35123529
ceph_assert(mdr->internal_op_finish == quiesce_ctx);

0 commit comments

Comments
 (0)