Skip to content

Commit 9c8f5bd

Browse files
authored
Merge pull request ceph#55773 from weirdwiz/main
exporter: handle exceptions gracefully Reviewed-by: Avan Thakkar <[email protected]> Reviewed-by: Juan Miguel Olmo <[email protected]> Reviewed-by: Pere Diaz Bou <[email protected]> Reviewed-by: Kefu Chai <[email protected]>
2 parents e789e13 + e442ee4 commit 9c8f5bd

File tree

2 files changed

+90
-62
lines changed

2 files changed

+90
-62
lines changed

src/exporter/DaemonMetricCollector.cc

Lines changed: 87 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,66 @@ std::string boost_string_to_std(boost::json::string js) {
8484

8585
std::string quote(std::string value) { return "\"" + value + "\""; }
8686

87+
void DaemonMetricCollector::parse_asok_metrics(
88+
std::string &counter_dump_response, std::string &counter_schema_response,
89+
int64_t prio_limit, const std::string &daemon_name) {
90+
json_object counter_dump =
91+
boost::json::parse(counter_dump_response).as_object();
92+
json_object counter_schema =
93+
boost::json::parse(counter_schema_response).as_object();
94+
95+
for (auto &perf_group_item : counter_schema) {
96+
std::string perf_group = {perf_group_item.key().begin(),
97+
perf_group_item.key().end()};
98+
json_array perf_group_schema_array = perf_group_item.value().as_array();
99+
json_array perf_group_dump_array = counter_dump[perf_group].as_array();
100+
for (auto schema_itr = perf_group_schema_array.begin(),
101+
dump_itr = perf_group_dump_array.begin();
102+
schema_itr != perf_group_schema_array.end() &&
103+
dump_itr != perf_group_dump_array.end();
104+
++schema_itr, ++dump_itr) {
105+
auto counters = schema_itr->at("counters").as_object();
106+
auto counters_labels = schema_itr->at("labels").as_object();
107+
auto counters_values = dump_itr->at("counters").as_object();
108+
labels_t labels;
109+
110+
for (auto &label : counters_labels) {
111+
std::string label_key = {label.key().begin(), label.key().end()};
112+
labels[label_key] = quote(label.value().as_string().c_str());
113+
}
114+
for (auto &counter : counters) {
115+
json_object counter_group = counter.value().as_object();
116+
if (counter_group["priority"].as_int64() < prio_limit) {
117+
continue;
118+
}
119+
std::string counter_name_init = {counter.key().begin(),
120+
counter.key().end()};
121+
std::string counter_name = perf_group + "_" + counter_name_init;
122+
promethize(counter_name);
123+
124+
auto extra_labels = get_extra_labels(daemon_name);
125+
if (extra_labels.empty()) {
126+
dout(1) << "Unable to parse instance_id from daemon_name: "
127+
<< daemon_name << dendl;
128+
continue;
129+
}
130+
labels.insert(extra_labels.begin(), extra_labels.end());
131+
132+
// For now this is only required for rgw multi-site metrics
133+
auto multisite_labels_and_name = add_fixed_name_metrics(counter_name);
134+
if (!multisite_labels_and_name.first.empty()) {
135+
labels.insert(multisite_labels_and_name.first.begin(),
136+
multisite_labels_and_name.first.end());
137+
counter_name = multisite_labels_and_name.second;
138+
}
139+
auto perf_values = counters_values.at(counter_name_init);
140+
dump_asok_metric(counter_group, perf_values, counter_name, labels);
141+
}
142+
}
143+
}
144+
}
145+
146+
87147
void DaemonMetricCollector::dump_asok_metrics(bool sort_metrics, int64_t counter_prio,
88148
bool sockClientsPing, std::string &dump_response,
89149
std::string &schema_response,
@@ -125,71 +185,36 @@ void DaemonMetricCollector::dump_asok_metrics(bool sort_metrics, int64_t counter
125185
continue;
126186
}
127187

128-
json_object counter_dump = boost::json::parse(counter_dump_response).as_object();
129-
json_object counter_schema = boost::json::parse(counter_schema_response).as_object();
130-
131-
for (auto &perf_group_item : counter_schema) {
132-
std::string perf_group = {perf_group_item.key().begin(),
133-
perf_group_item.key().end()};
134-
json_array perf_group_schema_array = perf_group_item.value().as_array();
135-
json_array perf_group_dump_array = counter_dump[perf_group].as_array();
136-
for (auto schema_itr = perf_group_schema_array.begin(),
137-
dump_itr = perf_group_dump_array.begin();
138-
schema_itr != perf_group_schema_array.end() &&
139-
dump_itr != perf_group_dump_array.end();
140-
++schema_itr, ++dump_itr) {
141-
auto counters = schema_itr->at("counters").as_object();
142-
auto counters_labels = schema_itr->at("labels").as_object();
143-
auto counters_values = dump_itr->at("counters").as_object();
144-
labels_t labels;
145-
146-
for (auto &label: counters_labels) {
147-
std::string label_key = {label.key().begin(), label.key().end()};
148-
labels[label_key] = quote(label.value().as_string().c_str());
149-
}
150-
for (auto &counter : counters) {
151-
json_object counter_group = counter.value().as_object();
152-
if (counter_group["priority"].as_int64() < prio_limit) {
153-
continue;
154-
}
155-
std::string counter_name_init = {counter.key().begin(), counter.key().end()};
156-
std::string counter_name = perf_group + "_" + counter_name_init;
157-
promethize(counter_name);
158-
159-
auto extra_labels = get_extra_labels(daemon_name);
160-
if (extra_labels.empty()) {
161-
dout(1) << "Unable to parse instance_id from daemon_name: " << daemon_name << dendl;
162-
continue;
163-
}
164-
labels.insert(extra_labels.begin(), extra_labels.end());
165-
166-
// For now this is only required for rgw multi-site metrics
167-
auto multisite_labels_and_name = add_fixed_name_metrics(counter_name);
168-
if (!multisite_labels_and_name.first.empty()) {
169-
labels.insert(multisite_labels_and_name.first.begin(), multisite_labels_and_name.first.end());
170-
counter_name = multisite_labels_and_name.second;
171-
}
172-
auto perf_values = counters_values.at(counter_name_init);
173-
dump_asok_metric(counter_group, perf_values, counter_name, labels);
174-
}
175-
}
176-
}
177-
std::string config_show = !config_show_response ? "" :
188+
try {
189+
std::string config_show = !config_show_response ? "" :
178190
asok_request(sock_client, "config show", daemon_name);
179-
if (config_show.size() == 0) {
191+
if (config_show.size() == 0) {
192+
failures++;
193+
continue;
194+
}
195+
json_object pid_file_json = boost::json::parse(config_show).as_object();
196+
std::string pid_path =
197+
boost_string_to_std(pid_file_json["pid_file"].as_string());
198+
std::string pid_str = read_file_to_string(pid_path);
199+
if (!pid_path.size()) {
200+
dout(1) << "pid path is empty; process metrics won't be fetched for: "
201+
<< daemon_name << dendl;
202+
}
203+
if (!pid_str.empty()) {
204+
daemon_pids.push_back({daemon_name, std::stoi(pid_str)});
205+
}
206+
parse_asok_metrics(counter_dump_response, counter_schema_response,
207+
prio_limit, daemon_name);
208+
} catch (const std::invalid_argument &e) {
180209
failures++;
210+
dout(1) << "failed to handle " << daemon_name << ": " << e.what()
211+
<< dendl;
212+
continue;
213+
} catch (const std::runtime_error &e) {
214+
failures++;
215+
dout(1) << "failed to parse json for " << daemon_name << ": " << e.what()
216+
<< dendl;
181217
continue;
182-
}
183-
json_object pid_file_json = boost::json::parse(config_show).as_object();
184-
std::string pid_path =
185-
boost_string_to_std(pid_file_json["pid_file"].as_string());
186-
std::string pid_str = read_file_to_string(pid_path);
187-
if (!pid_path.size()) {
188-
dout(1) << "pid path is empty; process metrics won't be fetched for: "
189-
<< daemon_name << dendl;
190-
}
191-
if (!pid_str.empty()) {
192-
daemon_pids.push_back({daemon_name, std::stoi(pid_str)});
193218
}
194219
}
195220
dout(10) << "Perf counters retrieved for " << clients.size() - failures << "/"

src/exporter/DaemonMetricCollector.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ class DaemonMetricCollector {
5252
void dump_asok_metric(boost::json::object perf_info,
5353
boost::json::value perf_values, std::string name,
5454
labels_t labels);
55+
void parse_asok_metrics(std::string &counter_dump_response,
56+
std::string &counter_schema_response,
57+
int64_t prio_limit, const std::string &daemon_name);
5558
void get_process_metrics(std::vector<std::pair<std::string, int>> daemon_pids);
5659
std::string asok_request(AdminSocketClient &asok, std::string command, std::string daemon_name);
5760
};

0 commit comments

Comments
 (0)