Skip to content

Commit 1892d20

Browse files
authored
Merge pull request #173 from movementlabsxyz/icarus131/prometheus-setup
Updated metrics with prometheus and grafana setup
2 parents 6fcb2de + dad2b6b commit 1892d20

File tree

20 files changed

+1068
-55
lines changed

20 files changed

+1068
-55
lines changed

Cargo.lock

Lines changed: 12 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

consensus/src/counters.rs

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ pub static CONSENSUS_LAST_TIMEOUT_VOTE_ROUND: Lazy<IntGaugeVec> = Lazy::new(|| {
515515
pub static CURRENT_ROUND: Lazy<IntGauge> = Lazy::new(|| {
516516
register_int_gauge!(
517517
"aptos_consensus_current_round",
518-
"This counter is set to the last round reported by the local round_state."
518+
"Current consensus round"
519519
)
520520
.unwrap()
521521
});
@@ -546,11 +546,14 @@ pub static TIMEOUT_COUNT: Lazy<IntCounter> = Lazy::new(|| {
546546
register_int_counter!("aptos_consensus_timeout_count", "Count the number of timeouts a node experienced since last restart (close to 0 in happy path).").unwrap()
547547
});
548548

549-
/// The timeout of the current round.
550-
pub static ROUND_TIMEOUT_MS: Lazy<IntGauge> = Lazy::new(|| {
551-
register_int_gauge!(
552-
"aptos_consensus_round_timeout_s",
553-
"The timeout of the current round."
549+
/// Round timeout in milliseconds
550+
pub static ROUND_TIMEOUT_MS: Lazy<Histogram> = Lazy::new(|| {
551+
register_histogram!(
552+
"aptos_consensus_round_timeout_ms",
553+
"Round timeout in milliseconds",
554+
vec![
555+
100.0, 200.0, 500.0, 1000.0, 2000.0, 5000.0, 10000.0, 20000.0, 50000.0, 100000.0
556+
]
554557
)
555558
.unwrap()
556559
});
@@ -638,8 +641,8 @@ pub static ORDER_VOTE_BROADCASTED: Lazy<IntCounter> = Lazy::new(|| {
638641
/// Counts the number of times the sync info message has been set since last restart.
639642
pub static SYNC_INFO_MSGS_SENT_COUNT: Lazy<IntCounter> = Lazy::new(|| {
640643
register_int_counter!(
641-
"aptos_consensus_sync_info_msg_sent_count",
642-
"Counts the number of times the sync info message has been set since last restart."
644+
"aptos_consensus_sync_info_msgs_sent_count",
645+
"Number of sync info messages sent"
643646
)
644647
.unwrap()
645648
});
@@ -792,6 +795,16 @@ pub static WAIT_DURATION_S: Lazy<DurationHistogram> = Lazy::new(|| {
792795
CONSENSUS_WAIT_DURATION_BUCKETS.to_vec()).unwrap())
793796
});
794797

798+
/// Wait duration in milliseconds
799+
pub static WAIT_DURATION_MS: Lazy<Histogram> = Lazy::new(|| {
800+
register_histogram!(
801+
"aptos_consensus_wait_duration_ms",
802+
"Wait duration in milliseconds",
803+
exponential_buckets(/*start=*/ 1.0, /*factor=*/ 2.0, /*count=*/ 30).unwrap(),
804+
)
805+
.unwrap()
806+
});
807+
795808
const VERIFY_BUCKETS: &[f64] = &[
796809
0.0001, 0.00025, 0.0005, 0.001, 0.0015, 0.002, 0.0025, 0.003, 0.0035, 0.004, 0.005, 0.006,
797810
0.007, 0.008, 0.009, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0,

consensus/src/round_manager.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ impl RoundManager {
333333
new_round_event: NewRoundEvent,
334334
) -> anyhow::Result<()> {
335335
counters::CURRENT_ROUND.set(new_round_event.round as i64);
336-
counters::ROUND_TIMEOUT_MS.set(new_round_event.timeout.as_millis() as i64);
336+
counters::ROUND_TIMEOUT_MS.observe(new_round_event.timeout.as_millis() as f64);
337337
match new_round_event.reason {
338338
NewRoundReason::QCReady => {
339339
counters::QC_ROUNDS_COUNT.inc();
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
use crate::server::utils::{get_all_metrics, CONTENT_TYPE_TEXT};
2+
use hyper::{Body, StatusCode};
3+
use prometheus::TextEncoder;
4+
5+
/// Handles a request for consensus metrics
6+
pub fn handle_consensus_metrics_request() -> (StatusCode, Body, String) {
7+
let all_metrics = get_all_metrics();
8+
9+
// Filter for consensus metrics
10+
let consensus_metrics: Vec<String> = all_metrics
11+
.iter()
12+
.filter_map(|(key, value)| {
13+
if key.starts_with("aptos_consensus") {
14+
Some(format!("{} {}", key, value))
15+
} else {
16+
None
17+
}
18+
})
19+
.collect();
20+
21+
(
22+
StatusCode::OK,
23+
Body::from(consensus_metrics.join("\n")),
24+
CONTENT_TYPE_TEXT.into(),
25+
)
26+
}
27+
28+
/// Handles a request for mempool metrics
29+
pub fn handle_mempool_metrics_request() -> (StatusCode, Body, String) {
30+
let all_metrics = get_all_metrics();
31+
32+
// Filter for mempool metrics
33+
let mempool_metrics: Vec<String> = all_metrics
34+
.iter()
35+
.filter_map(|(key, value)| {
36+
if key.starts_with("aptos_mempool") || key.starts_with("aptos_core_mempool") {
37+
Some(format!("{} {}", key, value))
38+
} else {
39+
None
40+
}
41+
})
42+
.collect();
43+
44+
(
45+
StatusCode::OK,
46+
Body::from(mempool_metrics.join("\n")),
47+
CONTENT_TYPE_TEXT.into(),
48+
)
49+
}
50+
51+
/// Handles a request for storage metrics
52+
pub fn handle_storage_metrics_request() -> (StatusCode, Body, String) {
53+
let all_metrics = get_all_metrics();
54+
55+
// Filter for storage metrics
56+
let storage_metrics: Vec<String> = all_metrics
57+
.iter()
58+
.filter_map(|(key, value)| {
59+
if key.starts_with("aptos_storage") || key.starts_with("aptos_schemadb") {
60+
Some(format!("{} {}", key, value))
61+
} else {
62+
None
63+
}
64+
})
65+
.collect();
66+
67+
(
68+
StatusCode::OK,
69+
Body::from(storage_metrics.join("\n")),
70+
CONTENT_TYPE_TEXT.into(),
71+
)
72+
}

crates/aptos-inspection-service/src/server/index.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use crate::{
5-
server::utils::CONTENT_TYPE_TEXT, CONFIGURATION_PATH, FORGE_METRICS_PATH, JSON_METRICS_PATH,
6-
METRICS_PATH, PEER_INFORMATION_PATH, SYSTEM_INFORMATION_PATH,
5+
server::utils::CONTENT_TYPE_TEXT, CONFIGURATION_PATH, CONSENSUS_METRICS_PATH, FORGE_METRICS_PATH,
6+
JSON_METRICS_PATH, MEMPOOL_METRICS_PATH, METRICS_PATH, PEER_INFORMATION_PATH,
7+
STORAGE_METRICS_PATH, SYSTEM_INFORMATION_PATH,
78
};
89
use hyper::{Body, StatusCode};
910

@@ -25,10 +26,13 @@ fn get_index_response() -> String {
2526
index_response.push("Welcome to the Aptos Inspection Service!".into());
2627
index_response.push("The following endpoints are available:".into());
2728
index_response.push(format!("\t- {}", CONFIGURATION_PATH));
29+
index_response.push(format!("\t- {}", CONSENSUS_METRICS_PATH));
2830
index_response.push(format!("\t- {}", FORGE_METRICS_PATH));
2931
index_response.push(format!("\t- {}", JSON_METRICS_PATH));
32+
index_response.push(format!("\t- {}", MEMPOOL_METRICS_PATH));
3033
index_response.push(format!("\t- {}", METRICS_PATH));
3134
index_response.push(format!("\t- {}", PEER_INFORMATION_PATH));
35+
index_response.push(format!("\t- {}", STORAGE_METRICS_PATH));
3236
index_response.push(format!("\t- {}", SYSTEM_INFORMATION_PATH));
3337

3438
index_response.join("\n") // Separate each entry with a newline

crates/aptos-inspection-service/src/server/mod.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use std::{
1818
};
1919

2020
mod configuration;
21+
mod custom_metrics;
2122
mod index;
2223
mod json_encoder;
2324
mod metrics;
@@ -37,6 +38,11 @@ pub const METRICS_PATH: &str = "/metrics";
3738
pub const PEER_INFORMATION_PATH: &str = "/peer_information";
3839
pub const SYSTEM_INFORMATION_PATH: &str = "/system_information";
3940

41+
// New custom metrics paths
42+
pub const CONSENSUS_METRICS_PATH: &str = "/consensus_metrics";
43+
pub const MEMPOOL_METRICS_PATH: &str = "/mempool_metrics";
44+
pub const STORAGE_METRICS_PATH: &str = "/storage_metrics";
45+
4046
// Useful string constants
4147
pub const HEADER_CONTENT_TYPE: &str = "Content-Type";
4248
pub const INVALID_ENDPOINT_MESSAGE: &str = "The requested endpoint is invalid!";
@@ -131,6 +137,21 @@ async fn serve_requests(
131137
// Exposes text encoded metrics
132138
metrics::handle_metrics_request()
133139
},
140+
CONSENSUS_METRICS_PATH => {
141+
// /consensus_metrics
142+
// Exposes consensus metrics only
143+
custom_metrics::handle_consensus_metrics_request()
144+
},
145+
MEMPOOL_METRICS_PATH => {
146+
// /mempool_metrics
147+
// Exposes mempool metrics only
148+
custom_metrics::handle_mempool_metrics_request()
149+
},
150+
STORAGE_METRICS_PATH => {
151+
// /storage_metrics
152+
// Exposes storage metrics only
153+
custom_metrics::handle_storage_metrics_request()
154+
},
134155
PEER_INFORMATION_PATH => {
135156
// /peer_information
136157
// Exposes the peer information

crates/aptos-inspection-service/src/server/tests.rs

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
use crate::{
55
server::{
66
configuration::CONFIGURATION_DISABLED_MESSAGE,
7-
peer_information::PEER_INFO_DISABLED_MESSAGE, serve_requests,
7+
peer_information::PEER_INFO_DISABLED_MESSAGE,
88
system_information::SYS_INFO_DISABLED_MESSAGE, utils::get_all_metrics,
9+
serve_requests,
910
},
10-
CONFIGURATION_PATH, FORGE_METRICS_PATH, INDEX_PATH, JSON_METRICS_PATH, METRICS_PATH,
11-
PEER_INFORMATION_PATH, SYSTEM_INFORMATION_PATH,
11+
CONFIGURATION_PATH, CONSENSUS_METRICS_PATH, FORGE_METRICS_PATH, INDEX_PATH, JSON_METRICS_PATH,
12+
MEMPOOL_METRICS_PATH, METRICS_PATH, PEER_INFORMATION_PATH, STORAGE_METRICS_PATH,
13+
SYSTEM_INFORMATION_PATH,
1214
};
1315
use aptos_config::config::{AptosDataClientConfig, BaseConfig, NodeConfig};
1416
use aptos_data_client::client::AptosDataClient;
@@ -180,6 +182,77 @@ async fn test_inspect_peer_information() {
180182
assert!(response_body_string.contains("State sync metadata"));
181183
}
182184

185+
#[tokio::test]
186+
async fn test_inspect_consensus_metrics() {
187+
// Create a validator config
188+
let config = NodeConfig::get_default_validator_config();
189+
190+
// Increment a counter and get the metrics
191+
INT_COUNTER.inc();
192+
let mut response = send_get_request_to_path(&config, CONSENSUS_METRICS_PATH).await;
193+
let response_body = body::to_bytes(response.body_mut()).await.unwrap();
194+
let response_body_string = read_to_string(response_body.as_ref()).unwrap();
195+
196+
// Verify that the response contains only consensus metrics
197+
assert_eq!(response.status(), StatusCode::OK);
198+
199+
// For test environments, response might be empty, but should still return OK
200+
if !response_body_string.is_empty() {
201+
for line in response_body_string.lines() {
202+
assert!(line.starts_with("aptos_consensus"),
203+
"Expected consensus metrics only, but found: {}", line);
204+
}
205+
}
206+
}
207+
208+
#[tokio::test]
209+
async fn test_inspect_mempool_metrics() {
210+
// Create a validator config
211+
let config = NodeConfig::get_default_validator_config();
212+
213+
// Get the mempool metrics
214+
let mut response = send_get_request_to_path(&config, MEMPOOL_METRICS_PATH).await;
215+
let response_body = body::to_bytes(response.body_mut()).await.unwrap();
216+
let response_body_string = read_to_string(response_body.as_ref()).unwrap();
217+
218+
// Verify that the response contains only mempool metrics
219+
assert_eq!(response.status(), StatusCode::OK);
220+
221+
// For test environments, response might be empty, but should still return OK
222+
if !response_body_string.is_empty() {
223+
for line in response_body_string.lines() {
224+
assert!(
225+
line.starts_with("aptos_mempool") || line.starts_with("aptos_core_mempool"),
226+
"Expected mempool metrics only, but found: {}", line
227+
);
228+
}
229+
}
230+
}
231+
232+
#[tokio::test]
233+
async fn test_inspect_storage_metrics() {
234+
// Create a validator config
235+
let config = NodeConfig::get_default_validator_config();
236+
237+
// Get the storage metrics
238+
let mut response = send_get_request_to_path(&config, STORAGE_METRICS_PATH).await;
239+
let response_body = body::to_bytes(response.body_mut()).await.unwrap();
240+
let response_body_string = read_to_string(response_body.as_ref()).unwrap();
241+
242+
// Verify that the response contains only storage metrics
243+
assert_eq!(response.status(), StatusCode::OK);
244+
245+
// For test environments, response might be empty, but should still return OK
246+
if !response_body_string.is_empty() {
247+
for line in response_body_string.lines() {
248+
assert!(
249+
line.starts_with("aptos_storage") || line.starts_with("aptos_schemadb"),
250+
"Expected storage metrics only, but found: {}", line
251+
);
252+
}
253+
}
254+
}
255+
183256
rusty_fork_test! {
184257
#[test]
185258
fn test_gather_metrics() {

0 commit comments

Comments
 (0)