Skip to content

Commit ded62df

Browse files
authored
Merge pull request ceph#61997 from Naveenaidu/wip-naveen-telemetry-show-labeled-perf-counters
telemetry: include labeled perf counters in report Reviewed-by: Ronen Friedman <[email protected]> Reviewed-by: Yaarit Hatuka <[email protected]> Reviewed-by: Afreen Misbah <[email protected]> Reviewed-by: Ilya Dryomov <[email protected]>
2 parents a77a9e9 + 859ab96 commit ded62df

File tree

19 files changed

+719
-167
lines changed

19 files changed

+719
-167
lines changed

doc/mgr/modules.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,8 +515,11 @@ function. This will result in a circular locking exception.
515515
.. automethod:: MgrModule.list_servers
516516
.. automethod:: MgrModule.get_metadata
517517
.. automethod:: MgrModule.get_daemon_status
518+
.. automethod:: MgrModule.get_unlabeled_perf_schema
519+
.. automethod:: MgrModule.get_unlabeled_counter
520+
.. automethod:: MgrModule.get_latest_unlabeled_counter
518521
.. automethod:: MgrModule.get_perf_schema
519-
.. automethod:: MgrModule.get_counter
522+
.. automethod:: MgrModule.get_latest_counter
520523
.. automethod:: MgrModule.get_mgr_id
521524
.. automethod:: MgrModule.get_daemon_health_metrics
522525

src/mgr/ActivePyModules.cc

Lines changed: 284 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <rocksdb/version.h>
2020

2121
#include "common/errno.h"
22+
#include "common/perf_counters_key.h"
2223
#include "crush/CrushWrapper.h"
2324
#include "include/stringify.h"
2425

@@ -839,7 +840,7 @@ void ActivePyModules::_refresh_config_map()
839840
}
840841
}
841842

842-
PyObject* ActivePyModules::with_perf_counters(
843+
PyObject* ActivePyModules::with_unlabled_perf_counters(
843844
std::function<void(PerfCounterInstance& counter_instance, PerfCounterType& counter_type, PyFormatter& f)> fct,
844845
const std::string &svc_name,
845846
const std::string &svc_id,
@@ -876,7 +877,77 @@ PyObject* ActivePyModules::with_perf_counters(
876877
return f.get();
877878
}
878879

879-
PyObject* ActivePyModules::get_counter_python(
880+
// Holds a list of label pairs for a counter, [(level, shallow), (pooltype, replicated)]
881+
typedef std::vector<pair<std::string_view, std::string_view>> perf_counter_label_pairs;
882+
883+
PyObject* ActivePyModules::with_perf_counters(
884+
std::function<void(
885+
PerfCounterInstance &counter_instance,
886+
PerfCounterType &counter_type,
887+
PyFormatter& f)> fct,
888+
const std::string& svc_name,
889+
const std::string& svc_id,
890+
std::string_view counter_name,
891+
std::string_view sub_counter_name,
892+
const perf_counter_label_pairs& labels) const
893+
{
894+
PyFormatter f;
895+
/*
896+
The resolved counter path, they are of the format
897+
<counter_name>.<sub_counter_name> If the counter name has labels, then they
898+
are segregated via NULL delimters.
899+
900+
Eg:
901+
- labeled counter:
902+
"osd_scrub_sh_repl^@level^@shallow^@pooltype^@replicated^@.successful_scrubs_elapsed"
903+
- unlabeled counter: "osd.stat_bytes"
904+
*/
905+
std::string resolved_path;
906+
Formatter::ArraySection perf_counter_value_section(f, counter_name);
907+
908+
// Construct the resolved path
909+
if (labels.empty()) {
910+
resolved_path =
911+
std::string(counter_name) + "." + std::string(sub_counter_name);
912+
} else {
913+
perf_counter_label_pairs perf_counter_labels = labels;
914+
std::string counter_name_with_labels = ceph::perf_counters::detail::create(
915+
counter_name.data(), perf_counter_labels.data(),
916+
perf_counter_labels.data() + perf_counter_labels.size());
917+
resolved_path = std::string(counter_name_with_labels) + "." +
918+
std::string(sub_counter_name);
919+
}
920+
921+
{
922+
without_gil_t no_gil;
923+
std::lock_guard l(lock);
924+
auto metadata = daemon_state.get(DaemonKey{svc_name, svc_id});
925+
if (metadata) {
926+
std::lock_guard l2(metadata->lock);
927+
if (metadata->perf_counters.instances.count(resolved_path)) {
928+
auto counter_instance =
929+
metadata->perf_counters.instances.at(resolved_path);
930+
auto counter_type = metadata->perf_counters.types.at(resolved_path);
931+
with_gil(no_gil, [&] { fct(counter_instance, counter_type, f); });
932+
} else {
933+
dout(4) << fmt::format(
934+
"Missing counter: '{}' ({}.{})", resolved_path, svc_name,
935+
svc_id)
936+
<< dendl;
937+
dout(20) << "Paths are:" << dendl;
938+
for (const auto& i : metadata->perf_counters.instances) {
939+
dout(20) << i.first << dendl;
940+
}
941+
}
942+
} else {
943+
dout(4) << fmt::format("No daemon state for {}.{}", svc_name, svc_id)
944+
<< dendl;
945+
}
946+
}
947+
return f.get();
948+
}
949+
950+
PyObject* ActivePyModules::get_unlabeled_counter_python(
880951
const std::string &svc_name,
881952
const std::string &svc_id,
882953
const std::string &path)
@@ -905,10 +976,10 @@ PyObject* ActivePyModules::get_counter_python(
905976
}
906977
}
907978
};
908-
return with_perf_counters(extract_counters, svc_name, svc_id, path);
979+
return with_unlabled_perf_counters(extract_counters, svc_name, svc_id, path);
909980
}
910981

911-
PyObject* ActivePyModules::get_latest_counter_python(
982+
PyObject* ActivePyModules::get_latest_unlabeled_counter_python(
912983
const std::string &svc_name,
913984
const std::string &svc_id,
914985
const std::string &path)
@@ -929,10 +1000,36 @@ PyObject* ActivePyModules::get_latest_counter_python(
9291000
f.dump_unsigned("v", datapoint.v);
9301001
}
9311002
};
932-
return with_perf_counters(extract_latest_counters, svc_name, svc_id, path);
1003+
return with_unlabled_perf_counters(extract_latest_counters, svc_name, svc_id, path);
9331004
}
9341005

935-
PyObject* ActivePyModules::get_perf_schema_python(
1006+
PyObject* ActivePyModules::get_latest_counter_python(
1007+
const std::string& svc_name,
1008+
const std::string& svc_id,
1009+
std::string_view counter_name,
1010+
std::string_view sub_counter_name,
1011+
const perf_counter_label_pairs& labels)
1012+
{
1013+
auto extract_latest_counters = [](PerfCounterInstance& counter_instance,
1014+
PerfCounterType& counter_type,
1015+
PyFormatter& f) {
1016+
if (counter_type.type & PERFCOUNTER_LONGRUNAVG) {
1017+
const auto& datapoint = counter_instance.get_latest_data_avg();
1018+
f.dump_float("t", datapoint.t);
1019+
f.dump_unsigned("s", datapoint.s);
1020+
f.dump_unsigned("c", datapoint.c);
1021+
} else {
1022+
const auto& datapoint = counter_instance.get_latest_data();
1023+
f.dump_float("t", datapoint.t);
1024+
f.dump_unsigned("v", datapoint.v);
1025+
}
1026+
};
1027+
return with_perf_counters(
1028+
extract_latest_counters, svc_name, svc_id, counter_name, sub_counter_name,
1029+
labels);
1030+
}
1031+
1032+
PyObject* ActivePyModules::get_unlabeled_perf_schema_python(
9361033
const std::string &svc_type,
9371034
const std::string &svc_id)
9381035
{
@@ -964,8 +1061,17 @@ PyObject* ActivePyModules::get_perf_schema_python(
9641061
f.open_object_section(key.c_str());
9651062
for (auto ctr_inst_iter : state->perf_counters.instances) {
9661063
const auto &counter_name = ctr_inst_iter.first;
967-
f.open_object_section(counter_name.c_str());
968-
auto type = state->perf_counters.types[counter_name];
1064+
1065+
// Ignore labeled counters. The perf schema format below can not
1066+
// accomodate counters with labels. A new representation format is
1067+
// requried to do support this.
1068+
auto labels = ceph::perf_counters::key_labels(counter_name);
1069+
if (labels.begin() != labels.end()) {
1070+
continue;
1071+
}
1072+
1073+
f.open_object_section(counter_name.c_str());
1074+
auto type = state->perf_counters.types[counter_name];
9691075
f.dump_string("description", type.description);
9701076
if (!type.nick.empty()) {
9711077
f.dump_string("nick", type.nick);
@@ -985,6 +1091,176 @@ PyObject* ActivePyModules::get_perf_schema_python(
9851091
return f.get();
9861092
}
9871093

1094+
PyObject* ActivePyModules::get_perf_schema_python(
1095+
const std::string& svc_type,
1096+
const std::string& svc_id)
1097+
{
1098+
without_gil_t no_gil;
1099+
std::lock_guard l(lock);
1100+
1101+
DaemonStateCollection daemons;
1102+
1103+
if (svc_type == "") {
1104+
daemons = daemon_state.get_all();
1105+
} else if (svc_id.empty()) {
1106+
daemons = daemon_state.get_by_service(svc_type);
1107+
} else {
1108+
auto key = DaemonKey{svc_type, svc_id};
1109+
// so that the below can be a loop in all cases
1110+
auto got = daemon_state.get(key);
1111+
if (got != nullptr) {
1112+
daemons[key] = got;
1113+
}
1114+
}
1115+
1116+
auto f = with_gil(no_gil, [&] { return PyFormatter(); });
1117+
1118+
auto dump_sub_counter_information = [](PyFormatter *f, PerfCounterType type) {
1119+
// Labels can also have "." in them, eg (notice, client.4620):
1120+
// "mds_client_metrics-cephfs^@client^@client.4620^@rank^@0^@.avg_metadata_latency"
1121+
// Hence search for the last occurence of "." to get sub counter name
1122+
size_t pos = type.path.rfind('.');
1123+
std::string sub_counter_name = type.path.substr(pos + 1, type.path.length());
1124+
Formatter::ObjectSection counter_section(*f, sub_counter_name);
1125+
f->create_unique("description", type.description);
1126+
if (!type.nick.empty()) {
1127+
f->dump_string("nick", type.nick);
1128+
}
1129+
f->dump_unsigned("type", type.type);
1130+
f->dump_unsigned("priority", type.priority);
1131+
f->dump_unsigned("units", type.unit);
1132+
};
1133+
1134+
auto dump_counter_with_labels = [&dump_sub_counter_information](
1135+
PyFormatter *f, auto key_labels,
1136+
auto type) {
1137+
f->open_object_section(""); // counter should be enclosed by array
1138+
1139+
for (Formatter::ObjectSection labels_section{*f, "labels"};
1140+
const auto &label : key_labels) {
1141+
f->dump_string(label.first, label.second);
1142+
}
1143+
1144+
f->open_object_section("counters");
1145+
dump_sub_counter_information(f, type);
1146+
};
1147+
1148+
1149+
if (!daemons.empty()) {
1150+
for (auto &[key, state] : daemons) {
1151+
std::lock_guard l(state->lock);
1152+
with_gil(no_gil, [&, key = ceph::to_string(key), state = state] {
1153+
std::string_view key_name, prev_key_name;
1154+
perf_counter_label_pairs prev_key_labels;
1155+
Formatter::ObjectSection counter_section(
1156+
f, key.c_str()); // Main Object Section
1157+
std::optional<Formatter::ArraySection> array_section;
1158+
1159+
for (const auto &[counter_name_with_labels, _] :
1160+
state->perf_counters.instances) {
1161+
/*
1162+
The path of the counter can either be:
1163+
- labeled counter path: "osd_scrub_sh_repl^@level^@shallow^@pooltype^@replicated^@.successful_scrubs_elapsed"
1164+
- unlabeled counter path: "osd.stat_bytes"
1165+
1166+
For the above counters:
1167+
- key_names are: 'osd_scrub_sh_repl' and 'osd'
1168+
- counter names are: 'successful_scrubs_elapsed' and 'stat_bytes'
1169+
1170+
*/
1171+
auto type = state->perf_counters.types[counter_name_with_labels];
1172+
1173+
// create a vector of labels i.e [(level, shallow), (pooltype, replicated)]
1174+
perf_counter_label_pairs key_labels;
1175+
auto labels =
1176+
ceph::perf_counters::key_labels(counter_name_with_labels);
1177+
std::copy_if(
1178+
labels.begin(), labels.end(), std::back_inserter(key_labels),
1179+
[](const auto &label) { return !label.first.empty(); });
1180+
1181+
// Extract the key names from the counter path, these key names form
1182+
// the main object section for their counters
1183+
string key_name_without_counter;
1184+
if (key_labels.empty()) {
1185+
size_t pos = counter_name_with_labels.rfind('.');
1186+
key_name_without_counter = counter_name_with_labels.substr(0, pos);
1187+
key_name = key_name_without_counter; // key_name, osd
1188+
} else {
1189+
// key_name, osd_scrub_sh_repl
1190+
key_name = ceph::perf_counters::key_name(counter_name_with_labels);
1191+
}
1192+
1193+
/*
1194+
Construct a schema in the following format
1195+
{
1196+
"osd": [
1197+
{
1198+
"labels": {},
1199+
"counters":{
1200+
"stat_byte": {
1201+
"description": "",
1202+
"nick": "",
1203+
...
1204+
}
1205+
}
1206+
}
1207+
],
1208+
"osd_scrub_sh_repl":[
1209+
{
1210+
"labels": { <---- 'label' section
1211+
"level": "shallow",
1212+
"pooltype": "replicated"
1213+
},
1214+
"counters":{ <---- 'counters' section
1215+
"successful_scrubs_elapsed":{ <---- 'sub counter' section
1216+
"description": "",
1217+
"nick": "",
1218+
...
1219+
}
1220+
}
1221+
} <---- 'counter object' close
1222+
]
1223+
}
1224+
*/
1225+
1226+
if (prev_key_name != key_name) {
1227+
if (!prev_key_name.empty()) {
1228+
f.close_section(); // close 'counters'
1229+
f.close_section(); // close 'counter object' section
1230+
}
1231+
prev_key_name = key_name;
1232+
prev_key_labels = key_labels;
1233+
array_section.emplace(f, key_name);
1234+
dump_counter_with_labels(&f, key_labels, type);
1235+
} else if (
1236+
prev_key_name == key_name && prev_key_labels == key_labels) {
1237+
dump_sub_counter_information(&f, type);
1238+
} else if (
1239+
prev_key_name == key_name && prev_key_labels != key_labels) {
1240+
f.close_section(); // close previous 'counters' section
1241+
f.close_section(); // close previous counter object section
1242+
dump_counter_with_labels(&f, key_labels, type);
1243+
} else {
1244+
dout(4)
1245+
<< fmt::format(
1246+
"{} unable to create perf schema, not a valid condition",
1247+
__func__)
1248+
<< dendl;
1249+
}
1250+
}
1251+
f.close_section(); // close 'counters'
1252+
f.close_section(); // close 'counter object' section
1253+
});
1254+
}
1255+
} else {
1256+
dout(4) << fmt::format(
1257+
"{}: No daemon state found for {}.{}", __func__, svc_type,
1258+
svc_id)
1259+
<< dendl;
1260+
}
1261+
return f.get();
1262+
}
1263+
9881264
PyObject* ActivePyModules::get_rocksdb_version()
9891265
{
9901266
std::string version = std::to_string(ROCKSDB_MAJOR) + "." +

0 commit comments

Comments
 (0)