Skip to content

Commit 210bae7

Browse files
authored
Merge pull request ceph#45370 from lxbsz/wip-54411
client: add option to disable collecting and sending metrics Reviewed-by: Venky Shankar <[email protected]>
2 parents f6d1cb6 + e9a26c5 commit 210bae7

File tree

6 files changed

+89
-36
lines changed

6 files changed

+89
-36
lines changed

src/client/Client.cc

Lines changed: 63 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,9 @@ Client::Client(Messenger *m, MonClient *mc, Objecter *objecter_)
378378
fuse_default_permissions = cct->_conf.get_val<bool>(
379379
"fuse_default_permissions");
380380

381+
_collect_and_send_global_metrics = cct->_conf.get_val<bool>(
382+
"client_collect_and_send_global_metrics");
383+
381384
if (cct->_conf->client_acl_type == "posix_acl")
382385
acl_type = POSIX_ACL;
383386

@@ -2337,6 +2340,7 @@ void Client::handle_client_session(const MConstRef<MClientSession>& m)
23372340
break;
23382341
}
23392342
session->mds_features = std::move(m->supported_features);
2343+
session->mds_metric_flags = std::move(m->metric_spec.metric_flags);
23402344

23412345
renew_caps(session.get());
23422346
session->state = MetaSession::STATE_OPEN;
@@ -6810,66 +6814,88 @@ void Client::collect_and_send_global_metrics() {
68106814
std::vector<ClientMetricMessage> message;
68116815

68126816
// read latency
6813-
metric = ClientMetricMessage(ReadLatencyPayload(logger->tget(l_c_read),
6814-
logger->tget(l_c_rd_avg),
6815-
logger->get(l_c_rd_sqsum),
6816-
nr_read_request));
6817-
message.push_back(metric);
6817+
if (_collect_and_send_global_metrics ||
6818+
session->mds_metric_flags.test(CLIENT_METRIC_TYPE_READ_LATENCY)) {
6819+
metric = ClientMetricMessage(ReadLatencyPayload(logger->tget(l_c_read),
6820+
logger->tget(l_c_rd_avg),
6821+
logger->get(l_c_rd_sqsum),
6822+
nr_read_request));
6823+
message.push_back(metric);
6824+
}
68186825

68196826
// write latency
6820-
metric = ClientMetricMessage(WriteLatencyPayload(logger->tget(l_c_wrlat),
6821-
logger->tget(l_c_wr_avg),
6822-
logger->get(l_c_wr_sqsum),
6823-
nr_write_request));
6824-
message.push_back(metric);
6827+
if (_collect_and_send_global_metrics ||
6828+
session->mds_metric_flags.test(CLIENT_METRIC_TYPE_WRITE_LATENCY)) {
6829+
metric = ClientMetricMessage(WriteLatencyPayload(logger->tget(l_c_wrlat),
6830+
logger->tget(l_c_wr_avg),
6831+
logger->get(l_c_wr_sqsum),
6832+
nr_write_request));
6833+
message.push_back(metric);
6834+
}
68256835

68266836
// metadata latency
6827-
metric = ClientMetricMessage(MetadataLatencyPayload(logger->tget(l_c_lat),
6828-
logger->tget(l_c_md_avg),
6829-
logger->get(l_c_md_sqsum),
6830-
nr_metadata_request));
6831-
message.push_back(metric);
6837+
if (session->mds_metric_flags.test(CLIENT_METRIC_TYPE_METADATA_LATENCY)) {
6838+
metric = ClientMetricMessage(MetadataLatencyPayload(logger->tget(l_c_lat),
6839+
logger->tget(l_c_md_avg),
6840+
logger->get(l_c_md_sqsum),
6841+
nr_metadata_request));
6842+
message.push_back(metric);
6843+
}
68326844

68336845
// cap hit ratio -- nr_caps is unused right now
6834-
auto [cap_hits, cap_misses] = get_cap_hit_rates();
6835-
metric = ClientMetricMessage(CapInfoPayload(cap_hits, cap_misses, 0));
6836-
message.push_back(metric);
6846+
if (_collect_and_send_global_metrics ||
6847+
session->mds_metric_flags.test(CLIENT_METRIC_TYPE_CAP_INFO)) {
6848+
auto [cap_hits, cap_misses] = get_cap_hit_rates();
6849+
metric = ClientMetricMessage(CapInfoPayload(cap_hits, cap_misses, 0));
6850+
message.push_back(metric);
6851+
}
68376852

68386853
// dentry lease hit ratio
6839-
auto [dlease_hits, dlease_misses, nr] = get_dlease_hit_rates();
6840-
metric = ClientMetricMessage(DentryLeasePayload(dlease_hits, dlease_misses, nr));
6841-
message.push_back(metric);
6854+
if (_collect_and_send_global_metrics ||
6855+
session->mds_metric_flags.test(CLIENT_METRIC_TYPE_DENTRY_LEASE)) {
6856+
auto [dlease_hits, dlease_misses, nr] = get_dlease_hit_rates();
6857+
metric = ClientMetricMessage(DentryLeasePayload(dlease_hits, dlease_misses, nr));
6858+
message.push_back(metric);
6859+
}
68426860

68436861
// opened files
6844-
{
6862+
if (session->mds_metric_flags.test(CLIENT_METRIC_TYPE_OPENED_FILES)) {
68456863
auto [opened_files, total_inodes] = get_opened_files_rates();
68466864
metric = ClientMetricMessage(OpenedFilesPayload(opened_files, total_inodes));
6865+
message.push_back(metric);
68476866
}
6848-
message.push_back(metric);
68496867

68506868
// pinned i_caps
6851-
{
6869+
if (_collect_and_send_global_metrics ||
6870+
session->mds_metric_flags.test(CLIENT_METRIC_TYPE_PINNED_ICAPS)) {
68526871
auto [pinned_icaps, total_inodes] = get_pinned_icaps_rates();
68536872
metric = ClientMetricMessage(PinnedIcapsPayload(pinned_icaps, total_inodes));
6873+
message.push_back(metric);
68546874
}
6855-
message.push_back(metric);
68566875

68576876
// opened inodes
6858-
{
6877+
if (_collect_and_send_global_metrics ||
6878+
session->mds_metric_flags.test(CLIENT_METRIC_TYPE_OPENED_INODES)) {
68596879
auto [opened_inodes, total_inodes] = get_opened_inodes_rates();
68606880
metric = ClientMetricMessage(OpenedInodesPayload(opened_inodes, total_inodes));
6881+
message.push_back(metric);
68616882
}
6862-
message.push_back(metric);
68636883

68646884
// read io sizes
6865-
metric = ClientMetricMessage(ReadIoSizesPayload(total_read_ops,
6866-
total_read_size));
6867-
message.push_back(metric);
6885+
if (_collect_and_send_global_metrics ||
6886+
session->mds_metric_flags.test(CLIENT_METRIC_TYPE_READ_IO_SIZES)) {
6887+
metric = ClientMetricMessage(ReadIoSizesPayload(total_read_ops,
6888+
total_read_size));
6889+
message.push_back(metric);
6890+
}
68686891

68696892
// write io sizes
6870-
metric = ClientMetricMessage(WriteIoSizesPayload(total_write_ops,
6871-
total_write_size));
6872-
message.push_back(metric);
6893+
if (_collect_and_send_global_metrics ||
6894+
session->mds_metric_flags.test(CLIENT_METRIC_TYPE_WRITE_IO_SIZES)) {
6895+
metric = ClientMetricMessage(WriteIoSizesPayload(total_write_ops,
6896+
total_write_size));
6897+
message.push_back(metric);
6898+
}
68736899

68746900
session->con->send_message2(make_message<MClientMetrics>(std::move(message)));
68756901
}
@@ -15945,6 +15971,10 @@ void Client::handle_conf_change(const ConfigProxy& conf,
1594515971
if (changed.count("client_oc_max_dirty_age")) {
1594615972
objectcacher->set_max_dirty_age(cct->_conf->client_oc_max_dirty_age);
1594715973
}
15974+
if (changed.count("client_collect_and_send_global_metrics")) {
15975+
_collect_and_send_global_metrics = cct->_conf.get_val<bool>(
15976+
"client_collect_and_send_global_metrics");
15977+
}
1594815978
}
1594915979

1595015980
void intrusive_ptr_add_ref(Inode *in)

src/client/Client.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,7 @@ class Client : public Dispatcher, public md_config_obs_t {
897897
std::unique_ptr<MDSMap> mdsmap;
898898

899899
bool fuse_default_permissions;
900+
bool _collect_and_send_global_metrics;
900901

901902
protected:
902903
/* Flags for check_caps() */

src/client/MetaSession.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ struct MetaSession {
2525
uint64_t cap_renew_seq = 0;
2626
entity_addrvec_t addrs;
2727
feature_bitset_t mds_features;
28+
feature_bitset_t mds_metric_flags;
2829

2930
enum {
3031
STATE_NEW, // Unused

src/common/options/mds-client.yaml.in

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,3 +546,16 @@ options:
546546
min: 0
547547
flags:
548548
- runtime
549+
- name: client_collect_and_send_global_metrics
550+
type: bool
551+
level: advanced
552+
desc: to enable and force collecting and sending the global metrics to MDS
553+
long_desc: To be careful for this, when connecting to some old ceph clusters
554+
it may crash the MDS daemons while upgrading.
555+
default: false
556+
tags:
557+
- client
558+
services:
559+
- mds_client
560+
flags:
561+
- runtime

src/mds/Server.cc

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ Server::Server(MDSRank *m, MetricsHandler *metrics_handler) :
268268
dir_max_entries = g_conf().get_val<uint64_t>("mds_dir_max_entries");
269269
bal_fragment_size_max = g_conf().get_val<int64_t>("mds_bal_fragment_size_max");
270270
supported_features = feature_bitset_t(CEPHFS_FEATURES_MDS_SUPPORTED);
271+
supported_metric_spec = feature_bitset_t(CEPHFS_METRIC_FEATURES_ALL);
271272
}
272273

273274
void Server::dispatch(const cref_t<Message> &m)
@@ -875,8 +876,10 @@ void Server::_session_logged(Session *session, uint64_t state_seq, bool open, ve
875876
metrics_handler->add_session(session);
876877
ceph_assert(session->get_connection());
877878
auto reply = make_message<MClientSession>(CEPH_SESSION_OPEN);
878-
if (session->info.has_feature(CEPHFS_FEATURE_MIMIC))
879+
if (session->info.has_feature(CEPHFS_FEATURE_MIMIC)) {
879880
reply->supported_features = supported_features;
881+
reply->metric_spec = supported_metric_spec;
882+
}
880883
mds->send_message_client(reply, session);
881884
if (mdcache->is_readonly()) {
882885
auto m = make_message<MClientSession>(CEPH_SESSION_FORCE_RO);
@@ -1029,8 +1032,10 @@ void Server::finish_force_open_sessions(const map<client_t,pair<Session*,uint64_
10291032
metrics_handler->add_session(session);
10301033

10311034
auto reply = make_message<MClientSession>(CEPH_SESSION_OPEN);
1032-
if (session->info.has_feature(CEPHFS_FEATURE_MIMIC))
1035+
if (session->info.has_feature(CEPHFS_FEATURE_MIMIC)) {
10331036
reply->supported_features = supported_features;
1037+
reply->metric_spec = supported_metric_spec;
1038+
}
10341039
mds->send_message_client(reply, session);
10351040

10361041
if (mdcache->is_readonly())
@@ -1500,8 +1505,10 @@ void Server::handle_client_reconnect(const cref_t<MClientReconnect> &m)
15001505
metrics_handler->add_session(session);
15011506
// notify client of success with an OPEN
15021507
auto reply = make_message<MClientSession>(CEPH_SESSION_OPEN);
1503-
if (session->info.has_feature(CEPHFS_FEATURE_MIMIC))
1508+
if (session->info.has_feature(CEPHFS_FEATURE_MIMIC)) {
15041509
reply->supported_features = supported_features;
1510+
reply->metric_spec = supported_metric_spec;
1511+
}
15051512
mds->send_message_client(reply, session);
15061513
mds->clog->debug() << "reconnect by " << session->info.inst << " after " << delay;
15071514
}

src/mds/Server.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,7 @@ class Server {
487487
std::set<client_t> client_reconnect_denied; // clients whose reconnect msg have been denied .
488488

489489
feature_bitset_t supported_features;
490+
feature_bitset_t supported_metric_spec;
490491
feature_bitset_t required_client_features;
491492

492493
bool forward_all_requests_to_auth = false;

0 commit comments

Comments
 (0)