Skip to content

Commit 658ee6c

Browse files
committed
cephfs_mirror: add labeled replication performance metrics
Fixes: http://tracker.ceph.com/issues/63945 Signed-off-by: Jos Collin <[email protected]> Signed-off-by: Venky Shankar <[email protected]>
1 parent 4c14f14 commit 658ee6c

File tree

8 files changed

+168
-2
lines changed

8 files changed

+168
-2
lines changed

src/common/options/cephfs-mirror.yaml.in

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,4 +91,15 @@ options:
9191
default: 10
9292
services:
9393
- cephfs-mirror
94-
min: 0
94+
min: 0
95+
- name: cephfs_mirror_perf_stats_prio
96+
type: int
97+
level: advanced
98+
desc: Priority level for mirror daemon replication perf counters
99+
long_desc: The daemon will send perf counter data to the manager daemon if the priority
100+
is not lower than mgr_stats_threshold.
101+
default: 5
102+
services:
103+
- cephfs-mirror
104+
min: 0
105+
max: 11

src/pybind/mgr/mgr_module.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2074,7 +2074,7 @@ def get_latest_avg(self, daemon_type: str, daemon_name: str, counter: str) -> Tu
20742074
@profile_method()
20752075
def get_unlabeled_perf_counters(self, prio_limit: int = PRIO_USEFUL,
20762076
services: Sequence[str] = ("mds", "mon", "osd",
2077-
"rbd-mirror", "rgw",
2077+
"rbd-mirror", "cephfs-mirror", "rgw",
20782078
"tcmu-runner")) -> Dict[str, dict]:
20792079
"""
20802080
Return the perf counters currently known to this ceph-mgr

src/tools/cephfs_mirror/FSMirror.cc

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#include "common/debug.h"
99
#include "common/errno.h"
1010
#include "common/WorkQueue.h"
11+
#include "common/perf_counters.h"
12+
#include "common/perf_counters_key.h"
1113
#include "include/stringify.h"
1214
#include "msg/Messenger.h"
1315
#include "FSMirror.h"
@@ -25,6 +27,14 @@
2527

2628
using namespace std;
2729

30+
// Performance Counters
31+
enum {
32+
l_cephfs_mirror_fs_mirror_first = 5000,
33+
l_cephfs_mirror_fs_mirror_peers,
34+
l_cephfs_mirror_fs_mirror_dir_count,
35+
l_cephfs_mirror_fs_mirror_last,
36+
};
37+
2838
namespace cephfs {
2939
namespace mirror {
3040

@@ -107,6 +117,18 @@ FSMirror::FSMirror(CephContext *cct, const Filesystem &filesystem, uint64_t pool
107117
m_asok_hook(new MirrorAdminSocketHook(cct, filesystem, this)) {
108118
m_service_daemon->add_or_update_fs_attribute(m_filesystem.fscid, SERVICE_DAEMON_DIR_COUNT_KEY,
109119
(uint64_t)0);
120+
121+
std::string labels = ceph::perf_counters::key_create("cephfs_mirror_mirrored_filesystems",
122+
{{"filesystem", m_filesystem.fs_name}});
123+
PerfCountersBuilder plb(m_cct, labels, l_cephfs_mirror_fs_mirror_first,
124+
l_cephfs_mirror_fs_mirror_last);
125+
auto prio = m_cct->_conf.get_val<int64_t>("cephfs_mirror_perf_stats_prio");
126+
plb.add_u64(l_cephfs_mirror_fs_mirror_peers,
127+
"mirroring_peers", "Mirroring Peers", "mpee", prio);
128+
plb.add_u64(l_cephfs_mirror_fs_mirror_dir_count,
129+
"directory_count", "Directory Count", "dirc", prio);
130+
m_perf_counters = plb.create_perf_counters();
131+
m_cct->get_perfcounters_collection()->add(m_perf_counters);
110132
}
111133

112134
FSMirror::~FSMirror() {
@@ -120,6 +142,12 @@ FSMirror::~FSMirror() {
120142
// outside the lock so that in-progress commands can acquire
121143
// lock and finish executing.
122144
delete m_asok_hook;
145+
PerfCounters *perf_counters = nullptr;
146+
std::swap(perf_counters, m_perf_counters);
147+
if (perf_counters != nullptr) {
148+
m_cct->get_perfcounters_collection()->remove(perf_counters);
149+
delete perf_counters;
150+
}
123151
}
124152

125153
int FSMirror::init_replayer(PeerReplayer *peer_replayer) {
@@ -355,6 +383,9 @@ void FSMirror::handle_acquire_directory(string_view dir_path) {
355383
peer_replayer->add_directory(dir_path);
356384
}
357385
}
386+
if (m_perf_counters) {
387+
m_perf_counters->set(l_cephfs_mirror_fs_mirror_dir_count, m_directories.size());
388+
}
358389
}
359390

360391
void FSMirror::handle_release_directory(string_view dir_path) {
@@ -372,6 +403,9 @@ void FSMirror::handle_release_directory(string_view dir_path) {
372403
peer_replayer->remove_directory(dir_path);
373404
}
374405
}
406+
if (m_perf_counters) {
407+
m_perf_counters->set(l_cephfs_mirror_fs_mirror_dir_count, m_directories.size());
408+
}
375409
}
376410
}
377411

@@ -395,6 +429,9 @@ void FSMirror::add_peer(const Peer &peer) {
395429
}
396430
m_peer_replayers.emplace(peer, std::move(replayer));
397431
ceph_assert(m_peer_replayers.size() == 1); // support only a single peer
432+
if (m_perf_counters) {
433+
m_perf_counters->inc(l_cephfs_mirror_fs_mirror_peers);
434+
}
398435
}
399436

400437
void FSMirror::remove_peer(const Peer &peer) {
@@ -415,6 +452,9 @@ void FSMirror::remove_peer(const Peer &peer) {
415452
dout(5) << ": shutting down replayers for peer=" << peer << dendl;
416453
shutdown_replayer(replayer.get());
417454
}
455+
if (m_perf_counters) {
456+
m_perf_counters->dec(l_cephfs_mirror_fs_mirror_peers);
457+
}
418458
}
419459

420460
void FSMirror::mirror_status(Formatter *f) {

src/tools/cephfs_mirror/FSMirror.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ class FSMirror {
154154

155155
MountRef m_mount;
156156

157+
PerfCounters *m_perf_counters;
158+
157159
int init_replayer(PeerReplayer *peer_replayer);
158160
void shutdown_replayer(PeerReplayer *peer_replayer);
159161
void cleanup();

src/tools/cephfs_mirror/Mirror.cc

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#include "common/errno.h"
1010
#include "common/Timer.h"
1111
#include "common/WorkQueue.h"
12+
#include "common/perf_counters.h"
13+
#include "common/perf_counters_key.h"
1214
#include "include/types.h"
1315
#include "mon/MonClient.h"
1416
#include "msg/Messenger.h"
@@ -20,6 +22,14 @@
2022
#undef dout_prefix
2123
#define dout_prefix *_dout << "cephfs::mirror::Mirror " << __func__
2224

25+
// Performance Counters
26+
enum {
27+
l_cephfs_mirror_first = 4000,
28+
l_cephfs_mirror_file_systems_mirrorred,
29+
l_cephfs_mirror_file_systems_mirror_enable_failures,
30+
l_cephfs_mirror_last,
31+
};
32+
2333
namespace cephfs {
2434
namespace mirror {
2535

@@ -277,6 +287,17 @@ int Mirror::init(std::string &reason) {
277287
return r;
278288
}
279289

290+
std::string labels = ceph::perf_counters::key_create("cephfs_mirror");
291+
PerfCountersBuilder plb(m_cct, labels, l_cephfs_mirror_first, l_cephfs_mirror_last);
292+
293+
auto prio = m_cct->_conf.get_val<int64_t>("cephfs_mirror_perf_stats_prio");
294+
plb.add_u64(l_cephfs_mirror_file_systems_mirrorred,
295+
"mirrored_filesystems", "Filesystems mirrored", "mir", prio);
296+
plb.add_u64_counter(l_cephfs_mirror_file_systems_mirror_enable_failures,
297+
"mirror_enable_failures", "Mirroring enable failures", "mirf", prio);
298+
m_perf_counters = plb.create_perf_counters();
299+
m_cct->get_perfcounters_collection()->add(m_perf_counters);
300+
280301
return 0;
281302
}
282303

@@ -285,6 +306,13 @@ void Mirror::shutdown() {
285306
m_stopping = true;
286307
m_cluster_watcher->shutdown();
287308
m_cond.notify_all();
309+
310+
PerfCounters *perf_counters = nullptr;
311+
std::swap(perf_counters, m_perf_counters);
312+
if (perf_counters != nullptr) {
313+
m_cct->get_perfcounters_collection()->remove(perf_counters);
314+
delete perf_counters;
315+
}
288316
}
289317

290318
void Mirror::reopen_logs() {
@@ -328,6 +356,9 @@ void Mirror::handle_enable_mirroring(const Filesystem &filesystem,
328356
m_service_daemon->add_or_update_fs_attribute(filesystem.fscid,
329357
SERVICE_DAEMON_MIRROR_ENABLE_FAILED_KEY,
330358
true);
359+
if (m_perf_counters) {
360+
m_perf_counters->inc(l_cephfs_mirror_file_systems_mirror_enable_failures);
361+
}
331362
return;
332363
}
333364

@@ -341,6 +372,9 @@ void Mirror::handle_enable_mirroring(const Filesystem &filesystem,
341372
}
342373

343374
dout(10) << ": Initialized FSMirror for filesystem=" << filesystem << dendl;
375+
if (m_perf_counters) {
376+
m_perf_counters->inc(l_cephfs_mirror_file_systems_mirrorred);
377+
}
344378
}
345379

346380
void Mirror::handle_enable_mirroring(const Filesystem &filesystem, int r) {
@@ -358,6 +392,9 @@ void Mirror::handle_enable_mirroring(const Filesystem &filesystem, int r) {
358392
m_service_daemon->add_or_update_fs_attribute(filesystem.fscid,
359393
SERVICE_DAEMON_MIRROR_ENABLE_FAILED_KEY,
360394
true);
395+
if (m_perf_counters) {
396+
m_perf_counters->inc(l_cephfs_mirror_file_systems_mirror_enable_failures);
397+
}
361398
return;
362399
}
363400

@@ -367,6 +404,9 @@ void Mirror::handle_enable_mirroring(const Filesystem &filesystem, int r) {
367404
m_cond.notify_all();
368405

369406
dout(10) << ": Initialized FSMirror for filesystem=" << filesystem << dendl;
407+
if (m_perf_counters) {
408+
m_perf_counters->inc(l_cephfs_mirror_file_systems_mirrorred);
409+
}
370410
}
371411

372412
void Mirror::enable_mirroring(const Filesystem &filesystem, uint64_t local_pool_id,
@@ -422,6 +462,10 @@ void Mirror::handle_disable_mirroring(const Filesystem &filesystem, int r) {
422462
m_mirror_actions.erase(filesystem);
423463
}
424464
}
465+
466+
if (m_perf_counters) {
467+
m_perf_counters->dec(l_cephfs_mirror_file_systems_mirrorred);
468+
}
425469
}
426470

427471
void Mirror::disable_mirroring(const Filesystem &filesystem, Context *on_finish) {

src/tools/cephfs_mirror/Mirror.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ class Mirror {
104104
RadosRef m_local;
105105
std::unique_ptr<ServiceDaemon> m_service_daemon;
106106

107+
PerfCounters *m_perf_counters;
108+
107109
int init_mon_client();
108110

109111
// called via listener

src/tools/cephfs_mirror/PeerReplayer.cc

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
#include "common/ceph_context.h"
1313
#include "common/debug.h"
1414
#include "common/errno.h"
15+
#include "common/perf_counters.h"
16+
#include "common/perf_counters_key.h"
1517
#include "FSMirror.h"
1618
#include "PeerReplayer.h"
1719
#include "Utils.h"
@@ -26,6 +28,18 @@
2628

2729
using namespace std;
2830

31+
// Performance Counters
32+
enum {
33+
l_cephfs_mirror_peer_replayer_first = 6000,
34+
l_cephfs_mirror_peer_replayer_snaps_synced,
35+
l_cephfs_mirror_peer_replayer_snaps_deleted,
36+
l_cephfs_mirror_peer_replayer_snaps_renamed,
37+
l_cephfs_mirror_peer_replayer_snap_sync_failures,
38+
l_cephfs_mirror_peer_replayer_avg_sync_time,
39+
l_cephfs_mirror_peer_replayer_sync_bytes,
40+
l_cephfs_mirror_peer_replayer_last,
41+
};
42+
2943
namespace cephfs {
3044
namespace mirror {
3145

@@ -161,10 +175,39 @@ PeerReplayer::PeerReplayer(CephContext *cct, FSMirror *fs_mirror,
161175
SERVICE_DAEMON_FAILED_DIR_COUNT_KEY, (uint64_t)0);
162176
m_service_daemon->add_or_update_peer_attribute(m_filesystem.fscid, m_peer,
163177
SERVICE_DAEMON_RECOVERED_DIR_COUNT_KEY, (uint64_t)0);
178+
179+
std::string labels = ceph::perf_counters::key_create("cephfs_mirror_peers",
180+
{{"source_fscid", stringify(m_filesystem.fscid)},
181+
{"source_filesystem", m_filesystem.fs_name},
182+
{"peer_cluster_name", m_peer.remote.cluster_name},
183+
{"peer_cluster_filesystem", m_peer.remote.fs_name}});
184+
PerfCountersBuilder plb(m_cct, labels, l_cephfs_mirror_peer_replayer_first,
185+
l_cephfs_mirror_peer_replayer_last);
186+
auto prio = m_cct->_conf.get_val<int64_t>("cephfs_mirror_perf_stats_prio");
187+
plb.add_u64_counter(l_cephfs_mirror_peer_replayer_snaps_synced,
188+
"snaps_synced", "Snapshots Synchronized", "sync", prio);
189+
plb.add_u64_counter(l_cephfs_mirror_peer_replayer_snaps_deleted,
190+
"snaps_deleted", "Snapshots Deleted", "del", prio);
191+
plb.add_u64_counter(l_cephfs_mirror_peer_replayer_snaps_renamed,
192+
"snaps_renamed", "Snapshots Renamed", "ren", prio);
193+
plb.add_u64_counter(l_cephfs_mirror_peer_replayer_snap_sync_failures,
194+
"sync_failures", "Snapshot Sync Failures", "fail", prio);
195+
plb.add_time_avg(l_cephfs_mirror_peer_replayer_avg_sync_time,
196+
"avg_sync_time", "Average Sync Time", "asyn", prio);
197+
plb.add_u64_counter(l_cephfs_mirror_peer_replayer_sync_bytes,
198+
"sync_bytes", "Sync Bytes", "sbye", prio);
199+
m_perf_counters = plb.create_perf_counters();
200+
m_cct->get_perfcounters_collection()->add(m_perf_counters);
164201
}
165202

166203
PeerReplayer::~PeerReplayer() {
167204
delete m_asok_hook;
205+
PerfCounters *perf_counters = nullptr;
206+
std::swap(perf_counters, m_perf_counters);
207+
if (perf_counters != nullptr) {
208+
m_cct->get_perfcounters_collection()->remove(perf_counters);
209+
delete perf_counters;
210+
}
168211
}
169212

170213
int PeerReplayer::init() {
@@ -516,6 +559,9 @@ int PeerReplayer::propagate_snap_deletes(const std::string &dir_root,
516559
return r;
517560
}
518561
inc_deleted_snap(dir_root);
562+
if (m_perf_counters) {
563+
m_perf_counters->inc(l_cephfs_mirror_peer_replayer_snaps_deleted);
564+
}
519565
}
520566

521567
return 0;
@@ -539,6 +585,9 @@ int PeerReplayer::propagate_snap_renames(
539585
return r;
540586
}
541587
inc_renamed_snap(dir_root);
588+
if (m_perf_counters) {
589+
m_perf_counters->inc(l_cephfs_mirror_peer_replayer_snaps_renamed);
590+
}
542591
}
543592

544593
return 0;
@@ -694,6 +743,9 @@ int PeerReplayer::remote_file_op(const std::string &dir_root, const std::string
694743
derr << ": failed to copy path=" << epath << ": " << cpp_strerror(r) << dendl;
695744
return r;
696745
}
746+
if (m_perf_counters) {
747+
m_perf_counters->inc(l_cephfs_mirror_peer_replayer_sync_bytes, stx.stx_size);
748+
}
697749
} else if (S_ISLNK(stx.stx_mode)) {
698750
// free the remote link before relinking
699751
r = ceph_unlinkat(m_remote_mount, fh.r_fd_dir_root, epath.c_str(), 0);
@@ -1457,7 +1509,17 @@ int PeerReplayer::do_sync_snaps(const std::string &dir_root) {
14571509
clear_current_syncing_snap(dir_root);
14581510
return r;
14591511
}
1512+
if (m_perf_counters) {
1513+
m_perf_counters->inc(l_cephfs_mirror_peer_replayer_snaps_synced);
1514+
}
14601515
std::chrono::duration<double> duration = clock::now() - start;
1516+
1517+
utime_t d;
1518+
d.set_from_double(duration.count());
1519+
if (m_perf_counters) {
1520+
m_perf_counters->tinc(l_cephfs_mirror_peer_replayer_avg_sync_time, d);
1521+
}
1522+
14611523
set_last_synced_stat(dir_root, it->first, it->second, duration.count());
14621524
if (--snaps_per_cycle == 0) {
14631525
break;
@@ -1481,6 +1543,9 @@ void PeerReplayer::sync_snaps(const std::string &dir_root,
14811543
locker.lock();
14821544
if (r < 0) {
14831545
_inc_failed_count(dir_root);
1546+
if (m_perf_counters) {
1547+
m_perf_counters->inc(l_cephfs_mirror_peer_replayer_snap_sync_failures);
1548+
}
14841549
} else {
14851550
_reset_failed_count(dir_root);
14861551
}

src/tools/cephfs_mirror/PeerReplayer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,8 @@ class PeerReplayer {
269269

270270
ServiceDaemonStats m_service_daemon_stats;
271271

272+
PerfCounters *m_perf_counters;
273+
272274
void run(SnapshotReplayerThread *replayer);
273275

274276
boost::optional<std::string> pick_directory();

0 commit comments

Comments
 (0)