44#include < boost/range/adaptor/map.hpp>
55#include < boost/range/algorithm/copy.hpp>
66
7+ #include " common/ceph_context.h"
8+ #include " common/perf_counters_key.h"
9+
710#include " MDSRank.h"
811#include " MetricAggregator.h"
912#include " mgr/MgrClient.h"
1316#undef dout_prefix
1417#define dout_prefix *_dout << " mds.metric.aggregator" << " " << __func__
1518
19+ // Performance Counters
20+ enum {
21+ l_mds_client_metrics_start = 10000 ,
22+ l_mds_client_metrics_num_clients,
23+ l_mds_client_metrics_last
24+ };
25+
26+ enum {
27+ l_mds_per_client_metrics_start = 20000 ,
28+ l_mds_per_client_metrics_cap_hits,
29+ l_mds_per_client_metrics_cap_misses,
30+ l_mds_per_client_metrics_avg_read_latency,
31+ l_mds_per_client_metrics_avg_write_latency,
32+ l_mds_per_client_metrics_avg_metadata_latency,
33+ l_mds_per_client_metrics_dentry_lease_hits,
34+ l_mds_per_client_metrics_dentry_lease_misses,
35+ l_mds_per_client_metrics_opened_files,
36+ l_mds_per_client_metrics_opened_inodes,
37+ l_mds_per_client_metrics_pinned_icaps,
38+ l_mds_per_client_metrics_total_inodes,
39+ l_mds_per_client_metrics_total_read_ops,
40+ l_mds_per_client_metrics_total_read_size,
41+ l_mds_per_client_metrics_total_write_ops,
42+ l_mds_per_client_metrics_total_write_size,
43+ l_mds_per_client_metrics_last
44+ };
45+
1646MetricAggregator::MetricAggregator (CephContext *cct, MDSRank *mds, MgrClient *mgrc)
1747 : Dispatcher(cct),
48+ m_cct(cct),
1849 mds(mds),
1950 mgrc(mgrc),
2051 mds_pinger(mds) {
@@ -32,6 +63,15 @@ void MetricAggregator::ping_all_active_ranks() {
3263int MetricAggregator::init () {
3364 dout (10 ) << dendl;
3465
66+ std::string labels = ceph::perf_counters::key_create (" mds_client_metrics" ,
67+ {{" fs_name" , mds->mdsmap ->get_fs_name ()},
68+ {" id" , stringify (mds->get_global_id ())}});
69+ PerfCountersBuilder plb (m_cct, labels, l_mds_client_metrics_start, l_mds_client_metrics_last);
70+ plb.add_u64 (l_mds_client_metrics_num_clients,
71+ " num_clients" , " Numer of client sessions" , " mcli" , PerfCountersBuilder::PRIO_CRITICAL);
72+ m_perf_counters = plb.create_perf_counters ();
73+ m_cct->get_perfcounters_collection ()->add (m_perf_counters);
74+
3575 pinger = std::thread ([this ]() {
3676 std::unique_lock locker (lock);
3777 while (!stopping) {
@@ -61,6 +101,24 @@ void MetricAggregator::shutdown() {
61101 std::scoped_lock locker (lock);
62102 ceph_assert (!stopping);
63103 stopping = true ;
104+
105+ // dealloc per-client perf counter
106+ for (auto [crpair, pc] : client_perf_counters) {
107+ PerfCounters *perf_counters = nullptr ;
108+ std::swap (perf_counters, pc);
109+ if (perf_counters != nullptr ) {
110+ m_cct->get_perfcounters_collection ()->remove (perf_counters);
111+ delete perf_counters;
112+ }
113+ }
114+ client_perf_counters.clear ();
115+
116+ PerfCounters *perf_counters = nullptr ;
117+ std::swap (perf_counters, m_perf_counters);
118+ if (perf_counters != nullptr ) {
119+ m_cct->get_perfcounters_collection ()->remove (perf_counters);
120+ delete perf_counters;
121+ }
64122 }
65123
66124 if (pinger.joinable ()) {
@@ -97,10 +155,110 @@ void MetricAggregator::refresh_metrics_for_rank(const entity_inst_t &client,
97155 << metrics << dendl;
98156
99157 auto &p = clients_by_rank.at (rank);
158+ auto crpair = std::make_pair (client, rank);
100159 bool ins = p.insert (client).second ;
101160 if (ins) {
102161 dout (20 ) << " : rank=" << rank << " has " << p.size () << " connected"
103162 << " client(s)" << dendl;
163+ if (m_perf_counters) {
164+ m_perf_counters->inc (l_mds_client_metrics_num_clients);
165+ }
166+
167+ std::string labels = ceph::perf_counters::key_create (" mds_client_metrics-" + std::string (mds->mdsmap ->get_fs_name ()),
168+ {{" client" , stringify (client.name )},
169+ {" rank" , stringify (rank)}});
170+ PerfCountersBuilder plb (m_cct, labels, l_mds_per_client_metrics_start, l_mds_per_client_metrics_last);
171+ plb.add_u64 (l_mds_per_client_metrics_cap_hits,
172+ " cap_hits" , " Capability hits" , " hcap" , PerfCountersBuilder::PRIO_CRITICAL);
173+ plb.add_u64 (l_mds_per_client_metrics_cap_misses,
174+ " cap_miss" , " Capability misses" , " mcap" , PerfCountersBuilder::PRIO_CRITICAL);
175+ plb.add_time (l_mds_per_client_metrics_avg_read_latency,
176+ " avg_read_latency" , " Average Read Latency" , " arlt" , PerfCountersBuilder::PRIO_CRITICAL);
177+ plb.add_time (l_mds_per_client_metrics_avg_write_latency,
178+ " avg_write_latency" , " Average Write Latency" , " awlt" , PerfCountersBuilder::PRIO_CRITICAL);
179+ plb.add_time (l_mds_per_client_metrics_avg_metadata_latency,
180+ " avg_metadata_latency" , " Average Metadata Latency" , " amlt" , PerfCountersBuilder::PRIO_CRITICAL);
181+ plb.add_u64 (l_mds_per_client_metrics_dentry_lease_hits,
182+ " dentry_lease_hits" , " Dentry Lease Hits" , " hden" , PerfCountersBuilder::PRIO_CRITICAL);
183+ plb.add_u64 (l_mds_per_client_metrics_dentry_lease_misses,
184+ " dentry_lease_miss" , " Dentry Lease Misses" , " mden" , PerfCountersBuilder::PRIO_CRITICAL);
185+ plb.add_u64 (l_mds_per_client_metrics_opened_files,
186+ " opened_files" , " Open Files" , " ofil" , PerfCountersBuilder::PRIO_CRITICAL);
187+ plb.add_u64 (l_mds_per_client_metrics_opened_inodes,
188+ " opened_inodes" , " Open Inodes" , " oino" , PerfCountersBuilder::PRIO_CRITICAL);
189+ plb.add_u64 (l_mds_per_client_metrics_pinned_icaps,
190+ " pinned_icaps" , " Pinned Inode Caps" , " pino" , PerfCountersBuilder::PRIO_CRITICAL);
191+ plb.add_u64 (l_mds_per_client_metrics_total_inodes,
192+ " total_inodes" , " Total Inodes" , " tino" , PerfCountersBuilder::PRIO_CRITICAL);
193+ plb.add_u64 (l_mds_per_client_metrics_total_read_ops,
194+ " total_read_ops" , " Total Read Operations" , " rops" , PerfCountersBuilder::PRIO_CRITICAL);
195+ plb.add_u64 (l_mds_per_client_metrics_total_read_size,
196+ " total_read_size" , " Total Read Size" , " rsiz" , PerfCountersBuilder::PRIO_CRITICAL);
197+ plb.add_u64 (l_mds_per_client_metrics_total_write_ops,
198+ " total_write_ops" , " Total Write Operations" , " wops" , PerfCountersBuilder::PRIO_CRITICAL);
199+ plb.add_u64 (l_mds_per_client_metrics_total_write_size,
200+ " total_write_size" , " Total Write Size" , " wsiz" , PerfCountersBuilder::PRIO_CRITICAL);
201+ client_perf_counters[crpair] = plb.create_perf_counters ();
202+ m_cct->get_perfcounters_collection ()->add (client_perf_counters[crpair]);
203+ }
204+
205+ // update perf counters
206+ PerfCounters *perf_counter_ptr = nullptr ;
207+ if (client_perf_counters.contains (crpair)) {
208+ perf_counter_ptr = client_perf_counters[crpair];
209+ }
210+
211+ if (perf_counter_ptr) {
212+ // client capability hit ratio
213+ perf_counter_ptr->set (l_mds_per_client_metrics_cap_hits, metrics.cap_hit_metric .hits );
214+ perf_counter_ptr->set (l_mds_per_client_metrics_cap_misses, metrics.cap_hit_metric .misses );
215+
216+ // some averages
217+ if (metrics.read_latency_metric .updated ) {
218+ utime_t ravg (metrics.read_latency_metric .mean .tv .tv_sec * 100 ,
219+ metrics.read_latency_metric .mean .tv .tv_nsec / 1000000 );
220+ perf_counter_ptr->tset (l_mds_per_client_metrics_avg_read_latency, ravg);
221+ }
222+ if (metrics.write_latency_metric .updated ) {
223+ utime_t wavg (metrics.write_latency_metric .mean .tv .tv_sec * 100 ,
224+ metrics.write_latency_metric .mean .tv .tv_nsec / 1000000 );
225+ perf_counter_ptr->set (l_mds_per_client_metrics_avg_write_latency, wavg);
226+ }
227+ if (metrics.metadata_latency_metric .updated ) {
228+ utime_t mavg (metrics.metadata_latency_metric .mean .tv .tv_sec * 100 ,
229+ metrics.metadata_latency_metric .mean .tv .tv_nsec / 1000000 );
230+ perf_counter_ptr->set (l_mds_per_client_metrics_avg_metadata_latency, mavg);
231+ }
232+
233+ // dentry leases
234+ if (metrics.dentry_lease_metric .updated ) {
235+ perf_counter_ptr->set (l_mds_per_client_metrics_dentry_lease_hits, metrics.dentry_lease_metric .hits );
236+ perf_counter_ptr->set (l_mds_per_client_metrics_dentry_lease_misses, metrics.dentry_lease_metric .misses );
237+ }
238+
239+ // file+inode opens, pinned inode caps
240+ if (metrics.opened_files_metric .updated ) {
241+ perf_counter_ptr->set (l_mds_per_client_metrics_opened_files, metrics.opened_files_metric .opened_files );
242+ perf_counter_ptr->set (l_mds_per_client_metrics_total_inodes, metrics.opened_files_metric .total_inodes );
243+ }
244+ if (metrics.opened_inodes_metric .updated ) {
245+ perf_counter_ptr->set (l_mds_per_client_metrics_opened_inodes, metrics.opened_inodes_metric .total_inodes );
246+ perf_counter_ptr->set (l_mds_per_client_metrics_total_inodes, metrics.opened_inodes_metric .total_inodes );
247+ }
248+ if (metrics.pinned_icaps_metric .updated ) {
249+ perf_counter_ptr->set (l_mds_per_client_metrics_pinned_icaps, metrics.pinned_icaps_metric .pinned_icaps );
250+ perf_counter_ptr->set (l_mds_per_client_metrics_total_inodes, metrics.pinned_icaps_metric .total_inodes );
251+ }
252+
253+ // read+write io metrics
254+ if (metrics.read_io_sizes_metric .updated ) {
255+ perf_counter_ptr->set (l_mds_per_client_metrics_total_read_ops, metrics.read_io_sizes_metric .total_ops );
256+ perf_counter_ptr->set (l_mds_per_client_metrics_total_read_size, metrics.read_io_sizes_metric .total_size );
257+ }
258+ if (metrics.write_io_sizes_metric .updated ) {
259+ perf_counter_ptr->set (l_mds_per_client_metrics_total_write_ops, metrics.write_io_sizes_metric .total_ops );
260+ perf_counter_ptr->set (l_mds_per_client_metrics_total_write_size, metrics.write_io_sizes_metric .total_size );
261+ }
104262 }
105263
106264 auto update_counter_func = [&metrics](const MDSPerformanceCounterDescriptor &d,
@@ -260,6 +418,13 @@ void MetricAggregator::remove_metrics_for_rank(const entity_inst_t &client,
260418 ceph_assert (rm);
261419 dout (20 ) << " : rank=" << rank << " has " << p.size () << " connected"
262420 << " client(s)" << dendl;
421+ auto crpair = std::make_pair (client, rank);
422+ m_cct->get_perfcounters_collection ()->remove (client_perf_counters[crpair]);
423+ delete client_perf_counters[crpair];
424+ client_perf_counters.erase (crpair);
425+ }
426+ if (m_perf_counters) {
427+ m_perf_counters->dec (l_mds_client_metrics_num_clients);
263428 }
264429
265430 auto sub_key_func = [client, rank](const MDSPerfMetricSubKeyDescriptor &d,
@@ -315,6 +480,10 @@ void MetricAggregator::handle_mds_metrics(const cref_t<MMDSMetrics> &m) {
315480 << rank << " with sequence number " << seq << dendl;
316481
317482 std::scoped_lock locker (lock);
483+ if (stopping) {
484+ dout (10 ) << " : stopping" << dendl;
485+ return ;
486+ }
318487 if (!mds_pinger.pong_received (rank, seq)) {
319488 return ;
320489 }
0 commit comments