Skip to content

Commit 5a0fe8f

Browse files
committed
fix: address comments
1 parent 30ab21e commit 5a0fe8f

File tree

2 files changed

+52
-23
lines changed

2 files changed

+52
-23
lines changed

consensus/src/counters.rs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ pub static CONSENSUS_LAST_TIMEOUT_VOTE_ROUND: Lazy<IntGaugeVec> = Lazy::new(|| {
515515
pub static CURRENT_ROUND: Lazy<IntGauge> = Lazy::new(|| {
516516
register_int_gauge!(
517517
"aptos_consensus_current_round",
518-
"This counter is set to the last round reported by the local round_state."
518+
"Current consensus round"
519519
)
520520
.unwrap()
521521
});
@@ -546,11 +546,11 @@ pub static TIMEOUT_COUNT: Lazy<IntCounter> = Lazy::new(|| {
546546
register_int_counter!("aptos_consensus_timeout_count", "Count the number of timeouts a node experienced since last restart (close to 0 in happy path).").unwrap()
547547
});
548548

549-
/// The timeout of the current round.
549+
/// Round timeout in milliseconds
550550
pub static ROUND_TIMEOUT_MS: Lazy<IntGauge> = Lazy::new(|| {
551551
register_int_gauge!(
552-
"aptos_consensus_round_timeout_s",
553-
"The timeout of the current round."
552+
"aptos_consensus_round_timeout_ms",
553+
"Round timeout in milliseconds"
554554
)
555555
.unwrap()
556556
});
@@ -638,8 +638,8 @@ pub static ORDER_VOTE_BROADCASTED: Lazy<IntCounter> = Lazy::new(|| {
638638
/// Counts the number of times the sync info message has been set since last restart.
639639
pub static SYNC_INFO_MSGS_SENT_COUNT: Lazy<IntCounter> = Lazy::new(|| {
640640
register_int_counter!(
641-
"aptos_consensus_sync_info_msg_sent_count",
642-
"Counts the number of times the sync info message has been set since last restart."
641+
"aptos_consensus_sync_info_msgs_sent_count",
642+
"Number of sync info messages sent"
643643
)
644644
.unwrap()
645645
});
@@ -792,6 +792,16 @@ pub static WAIT_DURATION_S: Lazy<DurationHistogram> = Lazy::new(|| {
792792
CONSENSUS_WAIT_DURATION_BUCKETS.to_vec()).unwrap())
793793
});
794794

795+
/// Wait duration in milliseconds
796+
pub static WAIT_DURATION_MS: Lazy<Histogram> = Lazy::new(|| {
797+
register_histogram!(
798+
"aptos_consensus_wait_duration_ms",
799+
"Wait duration in milliseconds",
800+
exponential_buckets(/*start=*/ 1.0, /*factor=*/ 2.0, /*count=*/ 30).unwrap(),
801+
)
802+
.unwrap()
803+
});
804+
795805
const VERIFY_BUCKETS: &[f64] = &[
796806
0.0001, 0.00025, 0.0005, 0.001, 0.0015, 0.002, 0.0025, 0.003, 0.0035, 0.004, 0.005, 0.006,
797807
0.007, 0.008, 0.009, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0,

crates/aptos-telemetry/src/core_metrics.rs

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use aptos_telemetry_service::types::telemetry::TelemetryEvent;
99
use once_cell::sync::Lazy;
1010
use prometheus::{
1111
core::{Collector, GenericGauge},
12-
IntCounter, IntCounterVec,
12+
Histogram, IntCounter, IntCounterVec,
1313
};
1414
use std::collections::BTreeMap;
1515

@@ -23,10 +23,10 @@ const CONSENSUS_TIMEOUT_COUNT: &str = "consensus_timeout_count";
2323
const CONSENSUS_LAST_COMMITTED_VERSION: &str = "consensus_last_committed_version";
2424
const CONSENSUS_COMMITTED_BLOCKS_COUNT: &str = "consensus_committed_blocks_count";
2525
const CONSENSUS_COMMITTED_TXNS_COUNT: &str = "consensus_committed_txns_count";
26-
const CONSENSUS_ROUND_TIMEOUT_SECS: &str = "consensus_round_timeout_secs";
26+
const CONSENSUS_ROUND_TIMEOUT_MS: &str = "consensus_round_timeout_ms";
2727
const CONSENSUS_SYNC_INFO_MSG_SENT_COUNT: &str = "consensus_sync_info_msg_sent_count";
2828
const CONSENSUS_CURRENT_ROUND: &str = "consensus_current_round";
29-
const CONSENSUS_WAIT_DURATION_S: &str = "consensus_wait_duration_s";
29+
const CONSENSUS_WAIT_DURATION_MS: &str = "consensus_wait_duration_ms";
3030
const MEMPOOL_CORE_MEMPOOL_INDEX_SIZE: &str = "mempool_core_mempool_index_size";
3131
const REST_RESPONSE_COUNT: &str = "rest_response_count";
3232
const ROLE_TYPE: &str = "role_type";
@@ -94,6 +94,15 @@ fn collect_consensus_metrics(core_metrics: &mut BTreeMap<String, String>) {
9494
})
9595
};
9696

97+
// Helper function to safely get histogram values
98+
let get_histogram_values = |metric: &'static Lazy<Histogram>| -> String {
99+
Lazy::get(metric).map_or("0".to_string(), |histogram| {
100+
let sum = histogram.get_sample_sum();
101+
let count = histogram.get_sample_count();
102+
format!("{} {}", sum, count) // Report sum and count for dashboard aggregation
103+
})
104+
};
105+
97106
// Collect basic consensus metrics
98107
core_metrics.insert(
99108
CONSENSUS_PROPOSALS_COUNT.into(),
@@ -128,17 +137,23 @@ fn collect_consensus_metrics(core_metrics: &mut BTreeMap<String, String>) {
128137
get_gauge_metric(&aptos_consensus::counters::CURRENT_ROUND),
129138
);
130139

131-
// Get the round timeout seconds from the histogram
132-
let round_timeout_ms = Lazy::get(&aptos_consensus::counters::ROUND_TIMEOUT_MS)
133-
.map_or(0, |counter| counter.get());
134-
let avg_round_timeout = round_timeout_ms as f64 / 1000.0; // Convert ms to seconds
135-
core_metrics.insert(CONSENSUS_ROUND_TIMEOUT_SECS.into(), avg_round_timeout.to_string());
140+
// Get the round timeout in milliseconds
141+
core_metrics.insert(
142+
CONSENSUS_ROUND_TIMEOUT_MS.into(),
143+
get_gauge_metric(&aptos_consensus::counters::ROUND_TIMEOUT_MS),
144+
);
136145

137146
// Get sync info messages count
138147
core_metrics.insert(
139148
CONSENSUS_SYNC_INFO_MSG_SENT_COUNT.into(),
140149
get_counter_metric(&aptos_consensus::counters::SYNC_INFO_MSGS_SENT_COUNT),
141150
);
151+
152+
// Get wait duration histogram values (sum and count)
153+
core_metrics.insert(
154+
CONSENSUS_WAIT_DURATION_MS.into(),
155+
get_histogram_values(&aptos_consensus::counters::WAIT_DURATION_MS),
156+
);
142157
}
143158

144159
/// Collects the mempool metrics and appends it to the given map
@@ -168,15 +183,19 @@ fn collect_mempool_metrics(core_metrics: &mut BTreeMap<String, String>) {
168183
.to_string(),
169184
);
170185

171-
// Get average transaction broadcast size
172-
let broadcast_size_sum = aptos_mempool::counters::SHARED_MEMPOOL_TRANSACTION_BROADCAST_SIZE
173-
.with_label_values(&["success"])
174-
.get_sample_sum();
175-
let broadcast_size_count = aptos_mempool::counters::SHARED_MEMPOOL_TRANSACTION_BROADCAST_SIZE
176-
.with_label_values(&["success"])
177-
.get_sample_count();
178-
let avg_broadcast_size = if broadcast_size_count > 0 {
179-
broadcast_size_sum / broadcast_size_count as f64
186+
// Get average transaction broadcast size from HistogramVec
187+
let broadcast_size = &aptos_mempool::counters::SHARED_MEMPOOL_TRANSACTION_BROADCAST_SIZE;
188+
let mut total_sum = 0.0;
189+
let mut total_count = 0.0;
190+
191+
// Sum up values across all label combinations
192+
for label_values in broadcast_size.get_metric_with_label_values(&["success"]).iter() {
193+
total_sum += label_values.get_sample_sum();
194+
total_count += label_values.get_sample_count() as f64;
195+
}
196+
197+
let avg_broadcast_size = if total_count > 0.0 {
198+
total_sum / total_count
180199
} else {
181200
0.0
182201
};

0 commit comments

Comments
 (0)