Skip to content

Commit c83bd22

Browse files
koarzYour Name
authored andcommitted
[enhance](meta-service)add bvar for fdb process status (#59329)
1 parent 7f5ba43 commit c83bd22

File tree

4 files changed

+216
-138
lines changed

4 files changed

+216
-138
lines changed

cloud/src/common/bvars.cpp

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -209,19 +209,9 @@ bvar::Status<int64_t> g_bvar_fdb_qos_worst_data_lag_storage_server_ns("fdb_qos_w
209209
bvar::Status<int64_t> g_bvar_fdb_qos_worst_durability_lag_storage_server_ns("fdb_qos_worst_durability_lag_storage_server_ns", BVAR_FDB_INVALID_VALUE);
210210
bvar::Status<int64_t> g_bvar_fdb_qos_worst_log_server_queue_bytes("fdb_qos_worst_log_server_queue_bytes", BVAR_FDB_INVALID_VALUE);
211211
bvar::Status<int64_t> g_bvar_fdb_qos_worst_storage_server_queue_bytes("fdb_qos_worst_storage_server_queue_bytes", BVAR_FDB_INVALID_VALUE);
212-
bvar::Status<int64_t> g_bvar_fdb_workload_conflict_rate_hz("fdb_workload_conflict_rate_hz", BVAR_FDB_INVALID_VALUE);
213-
bvar::Status<int64_t> g_bvar_fdb_workload_location_rate_hz("fdb_workload_location_rate_hz", BVAR_FDB_INVALID_VALUE);
214-
bvar::Status<int64_t> g_bvar_fdb_workload_keys_read_hz("fdb_workload_keys_read_hz", BVAR_FDB_INVALID_VALUE);
215-
bvar::Status<int64_t> g_bvar_fdb_workload_read_bytes_hz("fdb_workload_read_bytes_hz", BVAR_FDB_INVALID_VALUE);
216-
bvar::Status<int64_t> g_bvar_fdb_workload_read_rate_hz("fdb_workload_read_rate_hz", BVAR_FDB_INVALID_VALUE);
217-
bvar::Status<int64_t> g_bvar_fdb_workload_write_rate_hz("fdb_workload_write_rate_hz", BVAR_FDB_INVALID_VALUE);
218-
bvar::Status<int64_t> g_bvar_fdb_workload_written_bytes_hz("fdb_workload_written_bytes_hz", BVAR_FDB_INVALID_VALUE);
219-
bvar::Status<int64_t> g_bvar_fdb_workload_transactions_started_hz("fdb_workload_transactions_started_hz", BVAR_FDB_INVALID_VALUE);
220-
bvar::Status<int64_t> g_bvar_fdb_workload_transactions_committed_hz("fdb_workload_transactions_committed_hz", BVAR_FDB_INVALID_VALUE);
221-
bvar::Status<int64_t> g_bvar_fdb_workload_transactions_rejected_hz("fdb_workload_transactions_rejected_hz", BVAR_FDB_INVALID_VALUE);
222212
bvar::Status<int64_t> g_bvar_fdb_client_thread_busyness_percent("fdb_client_thread_busyness_percent", BVAR_FDB_INVALID_VALUE);
223-
mBvarStatus<int64_t> g_bvar_fdb_process_status_int("fdb_process_status_int", {"process_id", "component", "metric"});
224-
mBvarStatus<double> g_bvar_fdb_process_status_float("fdb_process_status_float", {"process_id", "component", "metric"});
213+
mBvarStatus<double> g_bvar_fdb_cluster_processes("fdb_cluster_processes", {"process_id", "component", "metric"});
214+
mBvarStatus<double> g_bvar_fdb_cluster_workload("fdb_cluster_workload", {"component", "metric"});
225215

226216
// checker's bvars
227217
BvarStatusWithTag<int64_t> g_bvar_checker_num_scanned("checker", "num_scanned");

cloud/src/common/bvars.h

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -700,19 +700,9 @@ extern bvar::Status<int64_t> g_bvar_fdb_qos_worst_data_lag_storage_server_ns;
700700
extern bvar::Status<int64_t> g_bvar_fdb_qos_worst_durability_lag_storage_server_ns;
701701
extern bvar::Status<int64_t> g_bvar_fdb_qos_worst_log_server_queue_bytes;
702702
extern bvar::Status<int64_t> g_bvar_fdb_qos_worst_storage_server_queue_bytes;
703-
extern bvar::Status<int64_t> g_bvar_fdb_workload_conflict_rate_hz;
704-
extern bvar::Status<int64_t> g_bvar_fdb_workload_location_rate_hz;
705-
extern bvar::Status<int64_t> g_bvar_fdb_workload_keys_read_hz;
706-
extern bvar::Status<int64_t> g_bvar_fdb_workload_read_bytes_hz;
707-
extern bvar::Status<int64_t> g_bvar_fdb_workload_read_rate_hz;
708-
extern bvar::Status<int64_t> g_bvar_fdb_workload_write_rate_hz;
709-
extern bvar::Status<int64_t> g_bvar_fdb_workload_written_bytes_hz;
710-
extern bvar::Status<int64_t> g_bvar_fdb_workload_transactions_started_hz;
711-
extern bvar::Status<int64_t> g_bvar_fdb_workload_transactions_committed_hz;
712-
extern bvar::Status<int64_t> g_bvar_fdb_workload_transactions_rejected_hz;
713703
extern bvar::Status<int64_t> g_bvar_fdb_client_thread_busyness_percent;
714-
extern mBvarStatus<int64_t> g_bvar_fdb_process_status_int;
715-
extern mBvarStatus<double> g_bvar_fdb_process_status_float;
704+
extern mBvarStatus<double> g_bvar_fdb_cluster_processes;
705+
extern mBvarStatus<double> g_bvar_fdb_cluster_workload;
716706

717707
// checker
718708
extern BvarStatusWithTag<long> g_bvar_checker_num_scanned;

cloud/src/common/metric.cpp

Lines changed: 107 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -141,68 +141,6 @@ static void export_fdb_status_details(const std::string& status_str) {
141141
DCHECK(node->value.IsDouble());
142142
return static_cast<int64_t>(node->value.GetDouble() * NANOSECONDS);
143143
};
144-
auto get_process_metric = [&](std::string component) {
145-
auto node = document.FindMember("cluster");
146-
if (!node->value.HasMember("processes")) return;
147-
node = node->value.FindMember("processes");
148-
// process
149-
for (auto process_node = node->value.MemberBegin(); process_node != node->value.MemberEnd();
150-
process_node++) {
151-
const char* process_id = process_node->name.GetString();
152-
decltype(process_node) component_node;
153-
// get component iter
154-
if (!process_node->value.HasMember(component.data())) return;
155-
component_node = process_node->value.FindMember(component.data());
156-
// There are three cases here: int64, double, and object.
157-
// If it is double or int64, put it directly into the bvar.
158-
// If it is an object, recursively obtain the full name and corresponding value.
159-
// such as: {"disk": {"reads": {"counter": 123, "hz": 0}}}
160-
// component is "disk", the names of these two values should be "reads_counter" and "reads_hz"
161-
auto recursive_name_helper = [](std::string& origin_name,
162-
const char* next_level_name) -> std::string {
163-
return origin_name + '_' + next_level_name;
164-
};
165-
// proved two type lambda func to handle object and other type
166-
167-
// set_bvar_value is responsible for setting integer and float values to the corresponding bvar.
168-
auto set_bvar_value = [&process_id, &component](
169-
std::string& name,
170-
decltype(process_node)& temp_node) -> void {
171-
if (temp_node->value.IsInt64()) {
172-
g_bvar_fdb_process_status_int.put({process_id, component, name},
173-
temp_node->value.GetInt64());
174-
return;
175-
}
176-
if (temp_node->value.IsDouble()) {
177-
g_bvar_fdb_process_status_float.put({process_id, component, name},
178-
temp_node->value.GetDouble());
179-
return;
180-
}
181-
LOG(WARNING) << fmt::format(
182-
"Get process metrics set_bvar_value input a wrong type node {}", name);
183-
};
184-
auto object_recursive = [&set_bvar_value, &recursive_name_helper](
185-
auto&& self, std::string name,
186-
decltype(process_node) temp_node) -> void {
187-
// if the node is an object, then get Member(iter) and recursive with iter as arg
188-
if (temp_node->value.IsObject()) {
189-
for (auto iter = temp_node->value.MemberBegin();
190-
iter != temp_node->value.MemberEnd(); iter++) {
191-
self(self, recursive_name_helper(name, iter->name.GetString()), iter);
192-
}
193-
return;
194-
}
195-
// if not object, set bvar value
196-
set_bvar_value(name, temp_node);
197-
};
198-
// Note that the parameter passed to set_bvar_value here is the current node, not its Member
199-
// so we can directly call object_recursive in the loop
200-
for (auto metric_node = component_node->value.MemberBegin();
201-
metric_node != component_node->value.MemberEnd(); metric_node++) {
202-
object_recursive(object_recursive, metric_node->name.GetString(), metric_node);
203-
}
204-
}
205-
};
206144
// Configuration
207145
g_bvar_fdb_configuration_coordinators_count.set_value(
208146
get_value({"configuration", "coordinators_count"}));
@@ -245,26 +183,6 @@ static void export_fdb_status_details(const std::string& status_str) {
245183
get_nanoseconds({"latency_probe", "commit_seconds"}));
246184
g_bvar_fdb_latency_probe_read_ns.set_value(get_nanoseconds({"latency_probe", "read_seconds"}));
247185

248-
// Workload
249-
g_bvar_fdb_workload_conflict_rate_hz.set_value(
250-
get_value({"workload", "transactions", "conflicted", "hz"}));
251-
g_bvar_fdb_workload_location_rate_hz.set_value(
252-
get_value({"workload", "operations", "location_requests", "hz"}));
253-
g_bvar_fdb_workload_keys_read_hz.set_value(get_value({"workload", "keys", "read", "hz"}));
254-
g_bvar_fdb_workload_read_bytes_hz.set_value(get_value({"workload", "bytes", "read", "hz"}));
255-
g_bvar_fdb_workload_read_rate_hz.set_value(
256-
get_value({"workload", "operations", "reads", "hz"}));
257-
g_bvar_fdb_workload_written_bytes_hz.set_value(
258-
get_value({"workload", "bytes", "written", "hz"}));
259-
g_bvar_fdb_workload_write_rate_hz.set_value(
260-
get_value({"workload", "operations", "writes", "hz"}));
261-
g_bvar_fdb_workload_transactions_started_hz.set_value(
262-
get_value({"workload", "transactions", "started", "hz"}));
263-
g_bvar_fdb_workload_transactions_committed_hz.set_value(
264-
get_value({"workload", "transactions", "committed", "hz"}));
265-
g_bvar_fdb_workload_transactions_rejected_hz.set_value(
266-
get_value({"workload", "transactions", "rejected_for_queued_too_long", "hz"}));
267-
268186
// QOS
269187
g_bvar_fdb_qos_worst_data_lag_storage_server_ns.set_value(
270188
get_nanoseconds({"qos", "worst_data_lag_storage_server", "seconds"}));
@@ -296,10 +214,117 @@ static void export_fdb_status_details(const std::string& status_str) {
296214
}
297215
}
298216

217+
// Helper function for recursive name construction
218+
// such as: {"disk": {"reads": {"counter": 123, "hz": 0}}}
219+
// component is "disk", the names of these two values should be "reads_counter" and "reads_hz"
220+
auto recursive_name_helper = [](std::string& origin_name,
221+
const char* next_level_name) -> std::string {
222+
return origin_name + '_' + next_level_name;
223+
};
224+
225+
// Generic recursive function to traverse JSON node and set bvar values
226+
// There are three cases here: int64, double, and object.
227+
// If it is double or int64, put it directly into the bvar.
228+
// If it is an object, recursively obtain the full name and corresponding value.
229+
auto recursive_traverse_and_set = [&recursive_name_helper](auto&& set_value_callback,
230+
auto&& self, std::string name,
231+
auto temp_node) -> void {
232+
// if the node is an object, then get Member(iter) and recursive with iter as arg
233+
if (temp_node->value.IsObject()) {
234+
for (auto iter = temp_node->value.MemberBegin(); iter != temp_node->value.MemberEnd();
235+
iter++) {
236+
self(set_value_callback, self, recursive_name_helper(name, iter->name.GetString()),
237+
iter);
238+
}
239+
return;
240+
}
241+
// if not object, set bvar value
242+
set_value_callback(name, temp_node);
243+
};
244+
245+
auto get_process_metric = [&](std::string component) {
246+
auto node = document.FindMember("cluster");
247+
if (!node->value.HasMember("processes")) return;
248+
node = node->value.FindMember("processes");
249+
// process
250+
for (auto process_node = node->value.MemberBegin(); process_node != node->value.MemberEnd();
251+
process_node++) {
252+
const char* process_id = process_node->name.GetString();
253+
decltype(process_node) component_node;
254+
// get component iter
255+
if (!process_node->value.HasMember(component.data())) continue;
256+
component_node = process_node->value.FindMember(component.data());
257+
258+
// set_bvar_value is responsible for setting integer and float values to the corresponding bvar.
259+
auto set_bvar_value = [&process_id, &component](
260+
std::string& name,
261+
decltype(process_node)& temp_node) -> void {
262+
if (temp_node->value.IsInt64()) {
263+
g_bvar_fdb_cluster_processes.put(
264+
{process_id, component, name},
265+
static_cast<double>(temp_node->value.GetInt64()));
266+
return;
267+
}
268+
if (temp_node->value.IsDouble()) {
269+
g_bvar_fdb_cluster_processes.put({process_id, component, name},
270+
temp_node->value.GetDouble());
271+
return;
272+
}
273+
LOG(WARNING) << fmt::format(
274+
"Get process metrics set_bvar_value input a wrong type node {}", name);
275+
};
276+
// Note that the parameter passed to set_bvar_value here is the current node, not its Member
277+
// so we can directly call recursive_traverse_and_set in the loop
278+
for (auto metric_node = component_node->value.MemberBegin();
279+
metric_node != component_node->value.MemberEnd(); metric_node++) {
280+
recursive_traverse_and_set(set_bvar_value, recursive_traverse_and_set,
281+
metric_node->name.GetString(), metric_node);
282+
}
283+
}
284+
};
285+
286+
auto get_workload_metric = [&](std::string component) {
287+
auto node = document.FindMember("cluster");
288+
if (!node->value.HasMember("workload")) return;
289+
node = node->value.FindMember("workload");
290+
291+
if (!node->value.HasMember(component.data())) return;
292+
auto component_node = node->value.FindMember(component.data());
293+
294+
// set_bvar_value is responsible for setting integer and float values to the corresponding bvar.
295+
auto set_bvar_value = [&component](std::string& name,
296+
decltype(component_node)& temp_node) -> void {
297+
if (temp_node->value.IsInt64()) {
298+
g_bvar_fdb_cluster_workload.put({component, name},
299+
static_cast<double>(temp_node->value.GetInt64()));
300+
return;
301+
}
302+
if (temp_node->value.IsDouble()) {
303+
g_bvar_fdb_cluster_workload.put({component, name}, temp_node->value.GetDouble());
304+
return;
305+
}
306+
LOG(WARNING) << fmt::format(
307+
"Get workload metrics set_bvar_value input a wrong type node {}", name);
308+
};
309+
310+
// Reuse the common recursive_traverse_and_set function
311+
for (auto metric_node = component_node->value.MemberBegin();
312+
metric_node != component_node->value.MemberEnd(); metric_node++) {
313+
recursive_traverse_and_set(set_bvar_value, recursive_traverse_and_set,
314+
metric_node->name.GetString(), metric_node);
315+
}
316+
};
317+
299318
// Process Status
300319
get_process_metric("cpu");
301320
get_process_metric("disk");
302321
get_process_metric("memory");
322+
323+
// Workload Status
324+
get_workload_metric("keys");
325+
get_workload_metric("bytes");
326+
get_workload_metric("operations");
327+
get_workload_metric("transactions");
303328
}
304329

305330
// boundaries include the key category{meta, txn, recycle...}, instance_id and sub_category{rowset, txn_label...}

0 commit comments

Comments
 (0)