@@ -9,7 +9,7 @@ use aptos_telemetry_service::types::telemetry::TelemetryEvent;
9
9
use once_cell:: sync:: Lazy ;
10
10
use prometheus:: {
11
11
core:: { Collector , GenericGauge } ,
12
- IntCounter , IntCounterVec ,
12
+ Histogram , IntCounter , IntCounterVec ,
13
13
} ;
14
14
use std:: collections:: BTreeMap ;
15
15
@@ -23,10 +23,10 @@ const CONSENSUS_TIMEOUT_COUNT: &str = "consensus_timeout_count";
23
23
const CONSENSUS_LAST_COMMITTED_VERSION : & str = "consensus_last_committed_version" ;
24
24
const CONSENSUS_COMMITTED_BLOCKS_COUNT : & str = "consensus_committed_blocks_count" ;
25
25
const CONSENSUS_COMMITTED_TXNS_COUNT : & str = "consensus_committed_txns_count" ;
26
- const CONSENSUS_ROUND_TIMEOUT_SECS : & str = "consensus_round_timeout_secs " ;
26
+ const CONSENSUS_ROUND_TIMEOUT_MS : & str = "consensus_round_timeout_ms " ;
27
27
const CONSENSUS_SYNC_INFO_MSG_SENT_COUNT : & str = "consensus_sync_info_msg_sent_count" ;
28
28
const CONSENSUS_CURRENT_ROUND : & str = "consensus_current_round" ;
29
- const CONSENSUS_WAIT_DURATION_S : & str = "consensus_wait_duration_s " ;
29
+ const CONSENSUS_WAIT_DURATION_MS : & str = "consensus_wait_duration_ms " ;
30
30
const MEMPOOL_CORE_MEMPOOL_INDEX_SIZE : & str = "mempool_core_mempool_index_size" ;
31
31
const REST_RESPONSE_COUNT : & str = "rest_response_count" ;
32
32
const ROLE_TYPE : & str = "role_type" ;
@@ -94,6 +94,15 @@ fn collect_consensus_metrics(core_metrics: &mut BTreeMap<String, String>) {
94
94
} )
95
95
} ;
96
96
97
+ // Helper function to safely get histogram values
98
+ let get_histogram_values = |metric : & ' static Lazy < Histogram > | -> String {
99
+ Lazy :: get ( metric) . map_or ( "0" . to_string ( ) , |histogram| {
100
+ let sum = histogram. get_sample_sum ( ) ;
101
+ let count = histogram. get_sample_count ( ) ;
102
+ format ! ( "{} {}" , sum, count) // Report sum and count for dashboard aggregation
103
+ } )
104
+ } ;
105
+
97
106
// Collect basic consensus metrics
98
107
core_metrics. insert (
99
108
CONSENSUS_PROPOSALS_COUNT . into ( ) ,
@@ -128,17 +137,23 @@ fn collect_consensus_metrics(core_metrics: &mut BTreeMap<String, String>) {
128
137
get_gauge_metric ( & aptos_consensus:: counters:: CURRENT_ROUND ) ,
129
138
) ;
130
139
131
- // Get the round timeout seconds from the histogram
132
- let round_timeout_ms = Lazy :: get ( & aptos_consensus :: counters :: ROUND_TIMEOUT_MS )
133
- . map_or ( 0 , |counter| counter . get ( ) ) ;
134
- let avg_round_timeout = round_timeout_ms as f64 / 1000.0 ; // Convert ms to seconds
135
- core_metrics . insert ( CONSENSUS_ROUND_TIMEOUT_SECS . into ( ) , avg_round_timeout . to_string ( ) ) ;
140
+ // Get the round timeout in milliseconds
141
+ core_metrics . insert (
142
+ CONSENSUS_ROUND_TIMEOUT_MS . into ( ) ,
143
+ get_gauge_metric ( & aptos_consensus :: counters :: ROUND_TIMEOUT_MS ) ,
144
+ ) ;
136
145
137
146
// Get sync info messages count
138
147
core_metrics. insert (
139
148
CONSENSUS_SYNC_INFO_MSG_SENT_COUNT . into ( ) ,
140
149
get_counter_metric ( & aptos_consensus:: counters:: SYNC_INFO_MSGS_SENT_COUNT ) ,
141
150
) ;
151
+
152
+ // Get wait duration histogram values (sum and count)
153
+ core_metrics. insert (
154
+ CONSENSUS_WAIT_DURATION_MS . into ( ) ,
155
+ get_histogram_values ( & aptos_consensus:: counters:: WAIT_DURATION_MS ) ,
156
+ ) ;
142
157
}
143
158
144
159
/// Collects the mempool metrics and appends it to the given map
@@ -168,15 +183,19 @@ fn collect_mempool_metrics(core_metrics: &mut BTreeMap<String, String>) {
168
183
. to_string ( ) ,
169
184
) ;
170
185
171
- // Get average transaction broadcast size
172
- let broadcast_size_sum = aptos_mempool:: counters:: SHARED_MEMPOOL_TRANSACTION_BROADCAST_SIZE
173
- . with_label_values ( & [ "success" ] )
174
- . get_sample_sum ( ) ;
175
- let broadcast_size_count = aptos_mempool:: counters:: SHARED_MEMPOOL_TRANSACTION_BROADCAST_SIZE
176
- . with_label_values ( & [ "success" ] )
177
- . get_sample_count ( ) ;
178
- let avg_broadcast_size = if broadcast_size_count > 0 {
179
- broadcast_size_sum / broadcast_size_count as f64
186
+ // Get average transaction broadcast size from HistogramVec
187
+ let broadcast_size = & aptos_mempool:: counters:: SHARED_MEMPOOL_TRANSACTION_BROADCAST_SIZE ;
188
+ let mut total_sum = 0.0 ;
189
+ let mut total_count = 0.0 ;
190
+
191
+ // Sum up values across all label combinations
192
+ for label_values in broadcast_size. get_metric_with_label_values ( & [ "success" ] ) . iter ( ) {
193
+ total_sum += label_values. get_sample_sum ( ) ;
194
+ total_count += label_values. get_sample_count ( ) as f64 ;
195
+ }
196
+
197
+ let avg_broadcast_size = if total_count > 0.0 {
198
+ total_sum / total_count
180
199
} else {
181
200
0.0
182
201
} ;
0 commit comments