Skip to content

Commit c8053b1

Browse files
committed
Merge PR ceph#53503 into main
* refs/pull/53503/head: qa: add tests for `mds last-seen` command doc/cephfs: add documentation for `mds last-seen` PendingReleaseNotes: add note on last-seen command mon/MDSMonitor: add command to lookup when mds was last seen mon/MDSMonitor: set birth time on FSMap during encode pybind/mgr/dashboard: show context diff for openapi check Reviewed-by: Venky Shankar <[email protected]>
2 parents a338e04 + edc584a commit c8053b1

File tree

13 files changed

+287
-2
lines changed

13 files changed

+287
-2
lines changed

PendingReleaseNotes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939
a large buildup of session metadata resulting in the MDS going read-only due to
4040
the RADOS operation exceeding the size threshold. `mds_session_metadata_threshold`
4141
config controls the maximum size that a (encoded) session metadata can grow.
42+
* CephFS: A new "mds last-seen" command is available for querying the last time
43+
an MDS was in the FSMap, subject to a pruning threshold.
4244
* CephFS: For clusters with multiple CephFS file systems, all the snap-schedule
4345
commands now expect the '--fs' argument.
4446
* CephFS: The period specifier ``m`` now implies minutes and the period specifier

doc/cephfs/administration.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,17 @@ Mark the file system rank as repaired. Unlike the name suggests, this command
280280
does not change a MDS; it manipulates the file system rank which has been
281281
marked damaged.
282282

283+
::
284+
285+
ceph mds last-seen <name>
286+
287+
Learn the when the MDS named ``name`` was last in the FSMap. The JSON output
288+
includes the epoch the MDS was last seen. Historically information is limited by
289+
the following ``mon`` configuration:
290+
291+
292+
.. confval:: mon_fsmap_prune_threshold
293+
283294

284295
Required Client Features
285296
------------------------

qa/tasks/cephfs/test_admin.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import uuid
55
from io import StringIO
66
from os.path import join as os_path_join
7+
import re
78
from time import sleep
89

910
from teuthology.exceptions import CommandFailedError
@@ -196,6 +197,108 @@ def wait_till_health_warn(self, health_warn, active_mds_id, sleep=3,
196197
return
197198

198199

200+
class TestMdsLastSeen(CephFSTestCase):
201+
"""
202+
Tests for `mds last-seen` command.
203+
"""
204+
205+
MDSS_REQUIRED = 2
206+
207+
def test_in_text(self):
208+
"""
209+
That `mds last-seen` returns 0 for an MDS currently in the map.
210+
"""
211+
212+
status = self.fs.status()
213+
r0 = self.fs.get_rank(0, status=status)
214+
s = self.get_ceph_cmd_stdout("mds", "last-seen", r0['name'])
215+
seconds = int(re.match(r"^(\d+)s$", s).group(1))
216+
self.assertEqual(seconds, 0)
217+
218+
def test_in_json(self):
219+
"""
220+
That `mds last-seen` returns 0 for an MDS currently in the map.
221+
"""
222+
223+
status = self.fs.status()
224+
r0 = self.fs.get_rank(0, status=status)
225+
s = self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", r0['name'])
226+
J = json.loads(s)
227+
seconds = int(re.match(r"^(\d+)s$", J['last-seen']).group(1))
228+
self.assertEqual(seconds, 0)
229+
230+
def test_unknown(self):
231+
"""
232+
That `mds last-seen` returns ENOENT for an mds not in recent maps.
233+
"""
234+
235+
try:
236+
self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", 'foo')
237+
except CommandFailedError as e:
238+
self.assertEqual(e.exitstatus, errno.ENOENT)
239+
else:
240+
self.fail("non-existent mds should fail ENOENT")
241+
242+
def test_standby(self):
243+
"""
244+
That `mds last-seen` returns 0 for a standby.
245+
"""
246+
247+
status = self.fs.status()
248+
for info in status.get_standbys():
249+
s = self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", info['name'])
250+
J = json.loads(s)
251+
seconds = int(re.match(r"^(\d+)s$", J['last-seen']).group(1))
252+
self.assertEqual(seconds, 0)
253+
254+
def test_stopped(self):
255+
"""
256+
That `mds last-seen` returns >0 for mds that is stopped.
257+
"""
258+
259+
status = self.fs.status()
260+
r0 = self.fs.get_rank(0, status=status)
261+
self.fs.mds_stop(mds_id=r0['name'])
262+
self.fs.rank_fail()
263+
sleep(2)
264+
with safe_while(sleep=1, tries=self.fs.beacon_timeout, action='wait for last-seen >0') as proceed:
265+
while proceed():
266+
s = self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", r0['name'])
267+
J = json.loads(s)
268+
seconds = int(re.match(r"^(\d+)s$", J['last-seen']).group(1))
269+
if seconds == 0:
270+
continue
271+
self.assertGreater(seconds, 1)
272+
break
273+
274+
def test_gc(self):
275+
"""
276+
That historical mds information is eventually garbage collected.
277+
"""
278+
279+
prune_time = 20
280+
sleep_time = 2
281+
self.config_set('mon', 'mon_fsmap_prune_threshold', prune_time)
282+
status = self.fs.status()
283+
r0 = self.fs.get_rank(0, status=status)
284+
self.fs.mds_stop(mds_id=r0['name'])
285+
self.fs.rank_fail()
286+
last = 0
287+
for i in range(prune_time):
288+
sleep(sleep_time) # we will sleep twice prune_time
289+
try:
290+
s = self.get_ceph_cmd_stdout("--format=json", "mds", "last-seen", r0['name'])
291+
J = json.loads(s)
292+
seconds = int(re.match(r"^(\d+)s$", J['last-seen']).group(1))
293+
self.assertGreater(seconds, last)
294+
log.debug("last_seen: %ds", seconds)
295+
last = seconds
296+
except CommandFailedError as e:
297+
self.assertEqual(e.exitstatus, errno.ENOENT)
298+
self.assertGreaterEqual(last + sleep_time + 1, prune_time) # rounding error add 1
299+
return
300+
self.fail("map was no garbage collected as expected")
301+
199302
@classhook('_add_valid_tell')
200303
class TestValidTell(TestAdminCommands):
201304
@classmethod

qa/tasks/mgr/dashboard/test_health.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ def test_full_health(self):
186186
})
187187
}),
188188
'fs_map': JObj({
189+
'btime': str,
189190
'compat': JObj({
190191
'compat': JObj({}, allow_unknown=True, unknown_schema=str),
191192
'incompat': JObj(

src/common/options/mon.yaml.in

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,18 @@ options:
778778
services:
779779
- mon
780780
with_legacy: true
781+
- name: mon_fsmap_prune_threshold
782+
type: secs
783+
level: advanced
784+
desc: prune fsmap older than this threshold in seconds
785+
fmt_desc: The monitors keep historical fsmaps in memory to optimize asking
786+
when an MDS daemon was last seen in the FSMap. This option controls
787+
how far back in time the monitors will look.
788+
default: 300
789+
flags:
790+
- runtime
791+
services:
792+
- mon
781793
- name: mds_beacon_mon_down_grace
782794
type: secs
783795
level: advanced

src/mds/FSMap.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ void Filesystem::dump(Formatter *f) const
135135
void FSMap::dump(Formatter *f) const
136136
{
137137
f->dump_int("epoch", epoch);
138+
f->dump_string("btime", fmt::format("{}", btime));
138139
// Use 'default' naming to match 'set-default' CLI
139140
f->dump_int("default_fscid", legacy_client_fscid);
140141

@@ -168,6 +169,7 @@ void FSMap::dump(Formatter *f) const
168169
FSMap &FSMap::operator=(const FSMap &rhs)
169170
{
170171
epoch = rhs.epoch;
172+
btime = rhs.btime;
171173
next_filesystem_id = rhs.next_filesystem_id;
172174
legacy_client_fscid = rhs.legacy_client_fscid;
173175
default_compat = rhs.default_compat;
@@ -206,6 +208,7 @@ void FSMap::generate_test_instances(std::list<FSMap*>& ls)
206208
void FSMap::print(ostream& out) const
207209
{
208210
out << "e" << epoch << std::endl;
211+
out << "btime " << fmt::format("{}", btime) << std::endl;
209212
out << "enable_multiple, ever_enabled_multiple: " << enable_multiple << ","
210213
<< ever_enabled_multiple << std::endl;
211214
out << "default compat: " << default_compat << std::endl;
@@ -296,6 +299,7 @@ void FSMap::print_summary(Formatter *f, ostream *out) const
296299
{
297300
if (f) {
298301
f->dump_unsigned("epoch", get_epoch());
302+
f->dump_string("btime", fmt::format("{}", btime));
299303
for (const auto& [fscid, fs] : filesystems) {
300304
f->dump_unsigned("id", fscid);
301305
f->dump_unsigned("up", fs.mds_map.up.size());
@@ -643,6 +647,7 @@ void FSMap::encode(bufferlist& bl, uint64_t features) const
643647
encode(standby_daemons, bl, features);
644648
encode(standby_epochs, bl);
645649
encode(ever_enabled_multiple, bl);
650+
encode(btime, bl);
646651
ENCODE_FINISH(bl);
647652
}
648653

@@ -674,6 +679,9 @@ void FSMap::decode(bufferlist::const_iterator& p)
674679
if (struct_v >= 7) {
675680
decode(ever_enabled_multiple, p);
676681
}
682+
if (struct_v >= 8) {
683+
decode(btime, p);
684+
}
677685
DECODE_FINISH(p);
678686
}
679687

src/mds/FSMap.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <errno.h>
2727

2828
#include "include/types.h"
29+
#include "common/ceph_time.h"
2930
#include "common/Clock.h"
3031
#include "mds/MDSMap.h"
3132

@@ -268,19 +269,21 @@ WRITE_CLASS_ENCODER_FEATURES(Filesystem)
268269

269270
class FSMap {
270271
public:
272+
using real_clock = ceph::real_clock;
271273
using mds_info_t = MDSMap::mds_info_t;
272274
using fsmap = typename std::map<fs_cluster_id_t, Filesystem>;
273275
using const_iterator = typename fsmap::const_iterator;
274276
using iterator = typename fsmap::iterator;
275277

276-
static const version_t STRUCT_VERSION = 7;
278+
static const version_t STRUCT_VERSION = 8;
277279
static const version_t STRUCT_VERSION_TRIM_TO = 7;
278280

279281
FSMap() : default_compat(MDSMap::get_compat_set_default()) {}
280282

281283
FSMap(const FSMap &rhs)
282284
:
283285
epoch(rhs.epoch),
286+
btime(rhs.btime),
284287
next_filesystem_id(rhs.next_filesystem_id),
285288
legacy_client_fscid(rhs.legacy_client_fscid),
286289
default_compat(rhs.default_compat),
@@ -584,6 +587,13 @@ class FSMap {
584587
epoch_t get_epoch() const { return epoch; }
585588
void inc_epoch() { epoch++; }
586589

590+
void set_btime() {
591+
btime = real_clock::now();
592+
}
593+
auto get_btime() const {
594+
return btime;
595+
}
596+
587597
version_t get_struct_version() const { return struct_version; }
588598
bool is_struct_old() const {
589599
return struct_version < STRUCT_VERSION_TRIM_TO;
@@ -676,6 +686,8 @@ class FSMap {
676686
}
677687

678688
epoch_t epoch = 0;
689+
ceph::real_time btime = real_clock::zero();
690+
679691
uint64_t next_filesystem_id = FS_CLUSTER_ID_ANONYMOUS + 1;
680692
fs_cluster_id_t legacy_client_fscid = FS_CLUSTER_ID_NONE;
681693
CompatSet default_compat;

src/mon/MDSMonitor.cc

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <sstream>
1717
#include <queue>
1818
#include <ranges>
19+
#include <boost/range/adaptors.hpp>
1920
#include <boost/utility.hpp>
2021

2122
#include "MDSMonitor.h"
@@ -238,6 +239,7 @@ void MDSMonitor::encode_pending(MonitorDBStore::TransactionRef t)
238239
if (!g_conf()->mon_mds_skip_sanity) {
239240
pending.sanity(true);
240241
}
242+
pending.set_btime();
241243

242244
// apply to paxos
243245
ceph_assert(get_last_committed() + 1 == pending.get_epoch());
@@ -1024,6 +1026,52 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op)
10241026
ds << fsmap;
10251027
}
10261028
r = 0;
1029+
} else if (prefix == "mds last-seen") {
1030+
std::string id;
1031+
cmd_getval(cmdmap, "id", id);
1032+
1033+
dout(10) << "last seen check for " << id << dendl;
1034+
1035+
auto& history = get_fsmap_history();
1036+
auto now = real_clock::now();
1037+
bool found = false;
1038+
/* Special case:
1039+
* If the mons consider the MDS "in" the latest FSMap, then the mds
1040+
* is always "last seen" **now** (for the purposes of this API). We
1041+
* don't look at past beacons because that is only managed by the
1042+
* leader and the logic is fudged in places in the event of suspected
1043+
* network partitions.
1044+
*/
1045+
std::chrono::seconds since = std::chrono::seconds(0);
1046+
1047+
for (auto& [epoch, fsmaph] : boost::adaptors::reverse(history)) {
1048+
dout(25) << "looking at epoch " << epoch << dendl;
1049+
auto* info = fsmaph.find_by_name(id);
1050+
if (info) {
1051+
dout(10) << "found: " << *info << dendl;
1052+
found = true;
1053+
if (f) {
1054+
f->open_object_section("mds last-seen");
1055+
f->dump_object("info", *info);
1056+
f->dump_string("last-seen", fmt::format("{}", since));
1057+
f->dump_int("epoch", epoch);
1058+
f->close_section();
1059+
f->flush(ds);
1060+
} else {
1061+
ds << fmt::format("{}", since);
1062+
}
1063+
break;
1064+
}
1065+
/* If the MDS appears in the next epoch, then it went away as of this epoch's btime.
1066+
*/
1067+
since = std::chrono::duration_cast<std::chrono::seconds>(now - fsmaph.get_btime());
1068+
}
1069+
if (found) {
1070+
r = 0;
1071+
} else {
1072+
ss << "mds " << id << " not found in recent FSMaps";
1073+
r = -ENOENT;
1074+
}
10271075
} else if (prefix == "mds ok-to-stop") {
10281076
vector<string> ids;
10291077
if (!cmd_getval(cmdmap, "ids", ids)) {
@@ -2379,6 +2427,39 @@ bool MDSMonitor::maybe_promote_standby(FSMap &fsmap, const Filesystem& fs)
23792427

23802428
void MDSMonitor::tick()
23812429
{
2430+
{
2431+
auto _history_prune_time = g_conf().get_val<std::chrono::seconds>("mon_fsmap_prune_threshold");
2432+
set_fsmap_history_threshold(_history_prune_time);
2433+
dout(20) << _history_prune_time << dendl;
2434+
prune_fsmap_history();
2435+
auto& history = get_fsmap_history();
2436+
auto now = real_clock::now();
2437+
if (auto it = history.begin(); it != history.end()) {
2438+
auto start = it->second.get_epoch();
2439+
dout(20) << "oldest epoch in history is " << start << dendl;
2440+
for (;;) {
2441+
--start;
2442+
bufferlist bl;
2443+
FSMap fsmaph;
2444+
int err = get_version(start, bl);
2445+
if (err == -ENOENT) {
2446+
break;
2447+
}
2448+
ceph_assert(err == 0);
2449+
ceph_assert(bl.length());
2450+
fsmaph.decode(bl);
2451+
auto btime = fsmaph.get_btime();
2452+
auto since = std::chrono::duration_cast<std::chrono::milliseconds>(now - btime);
2453+
dout(20) << "loaded epoch " << fsmaph.get_epoch() << " which is " << since << " old" << dendl;
2454+
if (since <= _history_prune_time) {
2455+
put_fsmap_history(fsmaph);
2456+
} else {
2457+
break;
2458+
}
2459+
}
2460+
}
2461+
}
2462+
23822463
if (!is_active() || !is_leader()) return;
23832464

23842465
auto &pending = get_pending_fsmap_writeable();

src/mon/MonCommands.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,9 @@ COMMAND("versions",
298298

299299
#define FS_NAME_GOODCHARS "[A-Za-z0-9-_.]"
300300
COMMAND_WITH_FLAG("mds stat", "show MDS status", "mds", "r", FLAG(HIDDEN))
301+
COMMAND("mds last-seen name=id,type=CephString,req=true",
302+
"fetch metadata for mds <id>",
303+
"mds", "r")
301304
COMMAND("fs dump "
302305
"name=epoch,type=CephInt,req=false,range=0",
303306
"dump all CephFS status, optionally from epoch", "mds", "r")

0 commit comments

Comments
 (0)