Skip to content

Commit 51a8990

Browse files
committed
exporter: user only counter dump/schema commands for extacting counters
Fixes: https://tracker.ceph.com/issues/59191 Signed-off-by: Avan Thakkar <[email protected]> Ceph exporter no more required the output of perf dump/schema, as the ``counter dump`` command returns both labeled and unlabeled perf counters which exporter can fetch and export. Removed the ``exporter_get_labeled_counters`` confiug option as exporter will now export all the counters, labeled or unlabeled. Also the fix includes the support for renaming the metrics name of rgw multi-site and adding labels to it, similar to what is there in prometheus module.
1 parent cec15a5 commit 51a8990

File tree

3 files changed

+90
-104
lines changed

3 files changed

+90
-104
lines changed

src/common/options/ceph-exporter.yaml.in

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,3 @@ options:
5252
- ceph-exporter
5353
flags:
5454
- runtime
55-
- name: exporter_get_labeled_counters
56-
type: bool
57-
level: advanced
58-
desc: If true will fetch and export labeled performance counters
59-
long_desc: Ceph perf counters now support labels to provide fine-grained
60-
stats using ``counter dump`` command and exporter can fetch these counters
61-
and add the labels in Prometheus format.
62-
default: true
63-
services:
64-
- ceph-exporter
65-
flags:
66-
- runtime

src/exporter/DaemonMetricCollector.cc

Lines changed: 89 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -96,25 +96,67 @@ void DaemonMetricCollector::dump_asok_metrics() {
9696
builder =
9797
std::unique_ptr<UnorderedMetricsBuilder>(new UnorderedMetricsBuilder());
9898
}
99+
auto prio_limit = g_conf().get_val<int64_t>("exporter_prio_limit");
99100
for (auto &[daemon_name, sock_client] : clients) {
100101
bool ok;
101102
sock_client.ping(&ok);
102103
if (!ok) {
103104
failures++;
104105
continue;
105106
}
106-
std::string perf_dump_response =
107-
asok_request(sock_client, "perf dump", daemon_name);
108-
if (perf_dump_response.size() == 0) {
109-
failures++;
110-
continue;
107+
std::string counter_dump_response =
108+
asok_request(sock_client, "counter dump", daemon_name);
109+
if (counter_dump_response.size() == 0) {
110+
failures++;
111+
continue;
111112
}
112-
std::string perf_schema_response =
113-
asok_request(sock_client, "perf schema", daemon_name);
114-
if (perf_schema_response.size() == 0) {
113+
std::string counter_schema_response =
114+
asok_request(sock_client, "counter schema", daemon_name);
115+
if (counter_schema_response.size() == 0) {
115116
failures++;
116117
continue;
117118
}
119+
120+
json_object counter_dump = boost::json::parse(counter_dump_response).as_object();
121+
json_object counter_schema = boost::json::parse(counter_schema_response).as_object();
122+
123+
for (auto &labeled_perf : counter_schema) {
124+
std::string labeled_perf_group = {labeled_perf.key().begin(), labeled_perf.key().end()};
125+
json_object labeled_perf_group_object = labeled_perf.value().as_object();
126+
auto counters = labeled_perf_group_object["counters"].as_object();
127+
auto counters_labels = labeled_perf_group_object["labels"].as_object();
128+
auto labeled_perf_group_counters = counter_dump[labeled_perf_group].as_object()["counters"].as_object();
129+
labels_t labels;
130+
131+
for(auto &label: counters_labels) {
132+
std::string label_key = {label.key().begin(), label.key().end()};
133+
labels[label_key] = quote(label.value().as_string().c_str());
134+
}
135+
for (auto &counter : counters) {
136+
json_object counter_group = counter.value().as_object();
137+
if (counter_group["priority"].as_int64() < prio_limit) {
138+
continue;
139+
}
140+
std::string counter_name_init = {counter.key().begin(), counter.key().end()};
141+
std::string counter_name = "ceph_" + labeled_perf_group + "_" + counter_name_init;
142+
std::replace_if(counter_name.begin(), counter_name.end(), is_hyphen, '_');
143+
144+
if (counters_labels.empty()) {
145+
auto labels_and_name = get_labels_and_metric_name(daemon_name, counter_name);
146+
labels = labels_and_name.first;
147+
counter_name = labels_and_name.second;
148+
}
149+
// For now this is only required for rgw multi-site metrics
150+
auto multisite_labels_and_name = add_fixed_name_metrics(counter_name);
151+
if (!multisite_labels_and_name.first.empty()) {
152+
labels.insert(multisite_labels_and_name.first.begin(), multisite_labels_and_name.first.end());
153+
counter_name = multisite_labels_and_name.second;
154+
}
155+
labels.insert({"ceph_daemon", quote(daemon_name)});
156+
auto perf_values = labeled_perf_group_counters.at(counter_name_init);
157+
dump_asok_metric(counter_group, perf_values, counter_name, labels);
158+
}
159+
}
118160
std::string config_show =
119161
asok_request(sock_client, "config show", daemon_name);
120162
if (config_show.size() == 0) {
@@ -132,76 +174,6 @@ void DaemonMetricCollector::dump_asok_metrics() {
132174
if (!pid_str.empty()) {
133175
daemon_pids.push_back({daemon_name, std::stoi(pid_str)});
134176
}
135-
json_object dump = boost::json::parse(perf_dump_response).as_object();
136-
json_object schema = boost::json::parse(perf_schema_response).as_object();
137-
auto prio_limit = g_conf().get_val<int64_t>("exporter_prio_limit");
138-
for (auto &perf : schema) {
139-
std::string perf_group = {perf.key().begin(), perf.key().end()};
140-
json_object perf_group_object = perf.value().as_object();
141-
for (auto &perf_counter : perf_group_object) {
142-
std::string perf_name = {perf_counter.key().begin(),
143-
perf_counter.key().end()};
144-
json_object perf_info = perf_counter.value().as_object();
145-
if (perf_info["priority"].as_int64() < prio_limit) {
146-
continue;
147-
}
148-
std::string name = "ceph_" + perf_group + "_" + perf_name;
149-
std::replace_if(name.begin(), name.end(), is_hyphen, '_');
150-
151-
auto labels_and_name = get_labels_and_metric_name(daemon_name, name);
152-
labels_t labels = labels_and_name.first;
153-
name = labels_and_name.second;
154-
155-
json_value perf_values = dump[perf_group].as_object()[perf_name];
156-
dump_asok_metric(perf_info, perf_values, name, labels);
157-
}
158-
}
159-
// fetch labeled perf counters if config is set to true
160-
bool labeledperf = g_conf().get_val<bool>("exporter_get_labeled_counters");
161-
if (labeledperf) {
162-
std::string counter_dump_response =
163-
asok_request(sock_client, "counter dump", daemon_name);
164-
if (counter_dump_response.size() == 0) {
165-
failures++;
166-
continue;
167-
}
168-
std::string counter_schema_response =
169-
asok_request(sock_client, "counter schema", daemon_name);
170-
if (counter_schema_response.size() == 0) {
171-
failures++;
172-
continue;
173-
}
174-
175-
json_object counter_dump = boost::json::parse(counter_dump_response).as_object();
176-
json_object counter_schema = boost::json::parse(counter_schema_response).as_object();
177-
178-
for (auto &labeled_perf : counter_schema) {
179-
std::string labeled_perf_group = {labeled_perf.key().begin(), labeled_perf.key().end()};
180-
json_object labeled_perf_group_object = labeled_perf.value().as_object();
181-
auto counters = labeled_perf_group_object["counters"].as_object();
182-
auto counters_labels = labeled_perf_group_object["labels"].as_object();
183-
auto labeled_perf_group_counters = counter_dump[labeled_perf_group].as_object()["counters"].as_object();
184-
labels_t labels;
185-
186-
for(auto &label: counters_labels) {
187-
std::string label_key = {label.key().begin(), label.key().end()};
188-
labels[label_key] = quote(label.value().as_string().c_str());
189-
}
190-
labels["ceph_daemon"] = quote(daemon_name);
191-
for (auto &counter : counters) {
192-
json_object counter_group = counter.value().as_object();
193-
if (counter_group["priority"].as_int64() < prio_limit) {
194-
continue;
195-
}
196-
std::string counter_name_init = {counter.key().begin(), counter.key().end()};
197-
std::string counter_name = "ceph_" + labeled_perf_group + "_" + counter_name_init;
198-
std::replace_if(counter_name.begin(), counter_name.end(), is_hyphen, '_');
199-
200-
auto perf_values = labeled_perf_group_counters.at(counter_name_init);
201-
dump_asok_metric(counter_group, perf_values, counter_name, labels);
202-
}
203-
}
204-
}
205177
}
206178
dout(10) << "Perf counters retrieved for " << clients.size() - failures << "/"
207179
<< clients.size() << " daemons." << dendl;
@@ -313,28 +285,53 @@ DaemonMetricCollector::get_labels_and_metric_name(std::string daemon_name,
313285
std::string new_metric_name;
314286
labels_t labels;
315287
new_metric_name = metric_name;
316-
if (daemon_name.find("rgw") != std::string::npos) {
288+
// In vstart cluster socket files for rgw are stored as radosgw.<instance_id>.asok
289+
if (daemon_name.find("radosgw") != std::string::npos) {
290+
std::size_t pos = daemon_name.find_last_of('.');
291+
std::string tmp = daemon_name.substr(pos+1);
292+
labels["instance_id"] = quote(tmp);
293+
}
294+
else if (daemon_name.find("rgw") != std::string::npos) {
317295
std::string tmp = daemon_name.substr(16, std::string::npos);
318296
std::string::size_type pos = tmp.find('.');
319297
labels["instance_id"] = quote("rgw." + tmp.substr(0, pos));
320-
} else {
321-
labels["ceph_daemon"] = quote(daemon_name);
322-
if (daemon_name.find("rbd-mirror") != std::string::npos) {
323-
std::regex re(
324-
"^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/"
325-
")?)(.*)\\.(replay(?:_bytes|_latency)?)$");
326-
std::smatch match;
327-
if (std::regex_search(daemon_name, match, re) == true) {
328-
new_metric_name = "ceph_rbd_mirror_image_" + match.str(4);
329-
labels["pool"] = quote(match.str(1));
330-
labels["namespace"] = quote(match.str(2));
331-
labels["image"] = quote(match.str(3));
332-
}
298+
}
299+
else if (daemon_name.find("rbd-mirror") != std::string::npos) {
300+
std::regex re(
301+
"^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/"
302+
")?)(.*)\\.(replay(?:_bytes|_latency)?)$");
303+
std::smatch match;
304+
if (std::regex_search(daemon_name, match, re) == true) {
305+
new_metric_name = "ceph_rbd_mirror_image_" + match.str(4);
306+
labels["pool"] = quote(match.str(1));
307+
labels["namespace"] = quote(match.str(2));
308+
labels["image"] = quote(match.str(3));
333309
}
334310
}
335311
return {labels, new_metric_name};
336312
}
337313

314+
// Add fixed name metrics from existing ones that have details in their names
315+
// that should be in labels (not in name). For backward compatibility,
316+
// a new fixed name metric is created (instead of replacing)and details are put
317+
// in new labels. Intended for RGW sync perf. counters but extendable as required.
318+
// See: https://tracker.ceph.com/issues/45311
319+
std::pair<labels_t, std::string>
320+
DaemonMetricCollector::add_fixed_name_metrics(std::string metric_name) {
321+
std::string new_metric_name;
322+
labels_t labels;
323+
new_metric_name = metric_name;
324+
325+
std::regex re("^data_sync_from_(.*)\\.");
326+
std::smatch match;
327+
if (std::regex_search(metric_name, match, re) == true) {
328+
new_metric_name = std::regex_replace(metric_name, re, "from_([^.]*)', 'from_zone");
329+
labels["source_zone"] = quote(match.str(1));
330+
return {labels, new_metric_name};
331+
}
332+
return {};
333+
}
334+
338335
/*
339336
perf_values can be either a int/double or a json_object. Since
340337
json_value is a wrapper of both we use that class.

src/exporter/DaemonMetricCollector.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ class DaemonMetricCollector {
4949
labels_t labels);
5050
std::pair<labels_t, std::string>
5151
get_labels_and_metric_name(std::string daemon_name, std::string metric_name);
52+
std::pair<labels_t, std::string> add_fixed_name_metrics(std::string metric_name);
5253
void get_process_metrics(std::vector<std::pair<std::string, int>> daemon_pids);
5354
std::string asok_request(AdminSocketClient &asok, std::string command, std::string daemon_name);
5455
};

0 commit comments

Comments
 (0)