Skip to content

Commit ee153ca

Browse files
authored
Merge pull request ceph#64335 from anmolbabu/ fixes 2345460
Handle failures in metric parsing
2 parents 6db0369 + f29e3f3 commit ee153ca

File tree

1 file changed

+124
-76
lines changed

1 file changed

+124
-76
lines changed

src/exporter/DaemonMetricCollector.cc

Lines changed: 124 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -102,62 +102,137 @@ std::string quote(std::string value) { return "\"" + value + "\""; }
102102
void DaemonMetricCollector::parse_asok_metrics(
103103
std::string &counter_dump_response, std::string &counter_schema_response,
104104
int64_t prio_limit, const std::string &daemon_name) {
105-
json_object counter_dump =
106-
boost::json::parse(counter_dump_response).as_object();
107-
json_object counter_schema =
108-
boost::json::parse(counter_schema_response).as_object();
109-
110-
for (auto &perf_group_item : counter_schema) {
111-
std::string perf_group = {perf_group_item.key().begin(),
112-
perf_group_item.key().end()};
113-
json_array perf_group_schema_array = perf_group_item.value().as_array();
114-
json_array perf_group_dump_array = counter_dump[perf_group].as_array();
115-
for (auto schema_itr = perf_group_schema_array.begin(),
116-
dump_itr = perf_group_dump_array.begin();
117-
schema_itr != perf_group_schema_array.end() &&
118-
dump_itr != perf_group_dump_array.end();
119-
++schema_itr, ++dump_itr) {
120-
auto counters = schema_itr->at("counters").as_object();
121-
auto counters_labels = schema_itr->at("labels").as_object();
122-
auto counters_values = dump_itr->at("counters").as_object();
123-
labels_t labels;
124-
125-
for (auto &label : counters_labels) {
126-
std::string label_key = {label.key().begin(), label.key().end()};
127-
labels[label_key] = quote(label.value().as_string().c_str());
128-
}
129-
for (auto &counter : counters) {
130-
json_object counter_group = counter.value().as_object();
131-
if (counter_group["priority"].as_int64() < prio_limit) {
132-
continue;
133-
}
134-
std::string counter_name_init = {counter.key().begin(),
135-
counter.key().end()};
136-
std::string counter_name = perf_group + "_" + counter_name_init;
137-
promethize(counter_name);
138-
139-
auto extra_labels = get_extra_labels(daemon_name);
140-
if (extra_labels.empty()) {
141-
dout(1) << "Unable to parse instance_id from daemon_name: "
142-
<< daemon_name << dendl;
105+
try {
106+
json_object counter_dump =
107+
boost::json::parse(counter_dump_response).as_object();
108+
json_object counter_schema =
109+
boost::json::parse(counter_schema_response).as_object();
110+
111+
for (auto &perf_group_item : counter_schema) {
112+
std::string perf_group = {perf_group_item.key().begin(),
113+
perf_group_item.key().end()};
114+
json_array perf_group_schema_array = perf_group_item.value().as_array();
115+
json_array perf_group_dump_array = counter_dump[perf_group].as_array();
116+
for (auto schema_itr = perf_group_schema_array.begin(),
117+
dump_itr = perf_group_dump_array.begin();
118+
schema_itr != perf_group_schema_array.end() &&
119+
dump_itr != perf_group_dump_array.end();
120+
++schema_itr, ++dump_itr) {
121+
try {
122+
auto counters = schema_itr->at("counters").as_object();
123+
auto counters_labels = schema_itr->at("labels").as_object();
124+
auto counters_values = dump_itr->at("counters").as_object();
125+
labels_t labels;
126+
127+
for (auto &label : counters_labels) {
128+
std::string label_key = {label.key().begin(), label.key().end()};
129+
labels[label_key] = quote(label.value().as_string().c_str());
130+
}
131+
for (auto &counter : counters) {
132+
try {
133+
json_object counter_group = counter.value().as_object();
134+
if (counter_group["priority"].as_int64() < prio_limit) {
135+
continue;
136+
}
137+
std::string counter_name_init = {counter.key().begin(),
138+
counter.key().end()};
139+
std::string counter_name = perf_group + "_" + counter_name_init;
140+
promethize(counter_name);
141+
142+
auto extra_labels = get_extra_labels(daemon_name);
143+
if (extra_labels.empty()) {
144+
dout(1) << "Unable to parse instance_id from daemon_name: "
145+
<< daemon_name << dendl;
146+
continue;
147+
}
148+
labels.insert(extra_labels.begin(), extra_labels.end());
149+
150+
// For now this is only required for rgw multi-site metrics
151+
auto multisite_labels_and_name = add_fixed_name_metrics(counter_name);
152+
if (!multisite_labels_and_name.first.empty()) {
153+
labels.insert(multisite_labels_and_name.first.begin(),
154+
multisite_labels_and_name.first.end());
155+
counter_name = multisite_labels_and_name.second;
156+
}
157+
auto perf_values = counters_values.at(counter_name_init);
158+
dump_asok_metric(counter_group, perf_values, counter_name, labels);
159+
} catch (const std::exception &e) {
160+
dout(1) << "Exception in counter processing for " << daemon_name << ": " << e.what() << dendl;
161+
continue;
162+
}
163+
}
164+
} catch (const std::exception &e) {
165+
dout(1) << "Exception in schema/dump iteration for " << daemon_name << ": " << e.what() << dendl;
143166
continue;
144167
}
145-
labels.insert(extra_labels.begin(), extra_labels.end());
146-
147-
// For now this is only required for rgw multi-site metrics
148-
auto multisite_labels_and_name = add_fixed_name_metrics(counter_name);
149-
if (!multisite_labels_and_name.first.empty()) {
150-
labels.insert(multisite_labels_and_name.first.begin(),
151-
multisite_labels_and_name.first.end());
152-
counter_name = multisite_labels_and_name.second;
153-
}
154-
auto perf_values = counters_values.at(counter_name_init);
155-
dump_asok_metric(counter_group, perf_values, counter_name, labels);
156168
}
157169
}
170+
} catch (const std::exception &e) {
171+
dout(1) << "Exception in parse_asok_metrics for " << daemon_name << ": " << e.what() << dendl;
172+
return;
158173
}
159174
}
160175

176+
/*
177+
perf_values can be either a int/double or a json_object. Since
178+
json_value is a wrapper of both we use that class.
179+
*/
180+
void DaemonMetricCollector::dump_asok_metric(json_object perf_info,
181+
json_value perf_values,
182+
std::string name,
183+
labels_t labels) {
184+
try {
185+
if (!perf_info.if_contains("type") ||
186+
!perf_info.if_contains("metric_type") ||
187+
!perf_info.if_contains("description")) {
188+
dout(1) << "Missing required key in perf_info for metric: " << name << dendl;
189+
return;
190+
}
191+
int64_t type = perf_info["type"].as_int64();
192+
193+
if (!perf_info["metric_type"].is_string()) {
194+
dout(1) << "Missing or invalid 'metric_type' in perf_info for metric: " << name << dendl;
195+
return;
196+
}
197+
std::string metric_type =
198+
boost_string_to_std(perf_info["metric_type"].as_string());
199+
200+
if (!perf_info["description"].is_string()) {
201+
dout(1) << "Missing or invalid 'description' in perf_info for metric: " << name << dendl;
202+
return;
203+
}
204+
std::string description =
205+
boost_string_to_std(perf_info["description"].as_string());
206+
207+
if (type & PERFCOUNTER_LONGRUNAVG) {
208+
if (!perf_values.is_object()) {
209+
dout(1) << "perf_values is not an object for metric: " << name << dendl;
210+
return;
211+
}
212+
auto perf_obj = perf_values.as_object();
213+
if (!perf_obj.if_contains("avgcount")) {
214+
dout(1) << "Missing 'avgcount' in perf_values for metric: " << name << dendl;
215+
return;
216+
}
217+
if (!perf_obj.if_contains("sum")) {
218+
dout(1) << "Missing 'sum' in perf_values for metric: " << name << dendl;
219+
return;
220+
}
221+
int64_t count = perf_obj["avgcount"].as_int64();
222+
add_metric(builder, count, name + "_count", description + " Count", "counter",
223+
labels);
224+
json_value sum_value = perf_obj["sum"];
225+
add_double_or_int_metric(builder, sum_value, name + "_sum", description + " Total",
226+
metric_type, labels);
227+
} else {
228+
add_double_or_int_metric(builder, perf_values, name, description,
229+
metric_type, labels);
230+
}
231+
} catch (const std::exception& e) {
232+
dout(1) << "Exception in dump_asok_metric for metric: " << name << ": " << e.what() << dendl;
233+
return;
234+
}
235+
}
161236

162237
void DaemonMetricCollector::dump_asok_metrics(bool sort_metrics, int64_t counter_prio,
163238
bool sockClientsPing, std::string &dump_response,
@@ -400,33 +475,6 @@ DaemonMetricCollector::add_fixed_name_metrics(std::string metric_name) {
400475
return {};
401476
}
402477

403-
/*
404-
perf_values can be either a int/double or a json_object. Since
405-
json_value is a wrapper of both we use that class.
406-
*/
407-
void DaemonMetricCollector::dump_asok_metric(json_object perf_info,
408-
json_value perf_values,
409-
std::string name,
410-
labels_t labels) {
411-
int64_t type = perf_info["type"].as_int64();
412-
std::string metric_type =
413-
boost_string_to_std(perf_info["metric_type"].as_string());
414-
std::string description =
415-
boost_string_to_std(perf_info["description"].as_string());
416-
417-
if (type & PERFCOUNTER_LONGRUNAVG) {
418-
int64_t count = perf_values.as_object()["avgcount"].as_int64();
419-
add_metric(builder, count, name + "_count", description + " Count", "counter",
420-
labels);
421-
json_value sum_value = perf_values.as_object()["sum"];
422-
add_double_or_int_metric(builder, sum_value, name + "_sum", description + " Total",
423-
metric_type, labels);
424-
} else {
425-
add_double_or_int_metric(builder, perf_values, name, description,
426-
metric_type, labels);
427-
}
428-
}
429-
430478
void DaemonMetricCollector::update_sockets() {
431479
std::string sock_dir = g_conf().get_val<std::string>("exporter_sock_dir");
432480
clients.clear();

0 commit comments

Comments
 (0)