From 12c09446c986ce5a477c7417b5036f2e2aea2f91 Mon Sep 17 00:00:00 2001 From: Connor Prussin Date: Thu, 5 Jun 2025 20:23:08 -0700 Subject: [PATCH] feat(fortuna): add prometheus metrics for history query latency --- apps/fortuna/Cargo.lock | 4 +- apps/fortuna/Cargo.toml | 2 +- apps/fortuna/src/api.rs | 5 + apps/fortuna/src/api/explorer.rs | 100 +++++++++++++++++++- apps/fortuna/src/config.rs | 11 ++- apps/fortuna/src/eth_utils/traced_client.rs | 9 +- apps/fortuna/src/history.rs | 16 ++-- apps/fortuna/src/keeper/keeper_metrics.rs | 19 ++-- 8 files changed, 131 insertions(+), 35 deletions(-) diff --git a/apps/fortuna/Cargo.lock b/apps/fortuna/Cargo.lock index f676901674..4588b93f6b 100644 --- a/apps/fortuna/Cargo.lock +++ b/apps/fortuna/Cargo.lock @@ -3108,9 +3108,9 @@ dependencies = [ [[package]] name = "prometheus-client" -version = "0.21.2" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c99afa9a01501019ac3a14d71d9f94050346f55ca471ce90c799a15c58f61e2" +checksum = "cf41c1a7c32ed72abe5082fb19505b969095c12da9f5732a4bc9878757fd087c" dependencies = [ "dtoa", "itoa", diff --git a/apps/fortuna/Cargo.toml b/apps/fortuna/Cargo.toml index 19852b1094..e0ff4f758d 100644 --- a/apps/fortuna/Cargo.toml +++ b/apps/fortuna/Cargo.toml @@ -19,7 +19,7 @@ ethabi = "18.0.0" ethers = { version = "2.0.14", features = ["ws"] } futures = { version = "0.3.28" } hex = "0.4.3" -prometheus-client = { version = "0.21.2" } +prometheus-client = { version = "0.23.1" } pythnet-sdk = { path = "../../pythnet/pythnet_sdk", features = ["strum"] } rand = "0.8.5" reqwest = { version = "0.11.22", features = ["json", "blocking"] } diff --git a/apps/fortuna/src/api.rs b/apps/fortuna/src/api.rs index 6ee7fd5d92..fd7446d359 100644 --- a/apps/fortuna/src/api.rs +++ b/apps/fortuna/src/api.rs @@ -61,6 +61,8 @@ pub struct ApiState { /// Prometheus metrics pub metrics: Arc, + + pub explorer_metrics: Arc, } impl ApiState { @@ -73,6 +75,8 @@ impl ApiState { http_requests: Family::default(), }; + let explorer_metrics = Arc::new(ExplorerMetrics::new(metrics_registry.clone()).await); + let http_requests = metrics.http_requests.clone(); metrics_registry.write().await.register( "http_requests", @@ -83,6 +87,7 @@ impl ApiState { ApiState { chains, metrics: Arc::new(metrics), + explorer_metrics, history, metrics_registry, } diff --git a/apps/fortuna/src/api/explorer.rs b/apps/fortuna/src/api/explorer.rs index d682dc4ead..48bc59d7a2 100644 --- a/apps/fortuna/src/api/explorer.rs +++ b/apps/fortuna/src/api/explorer.rs @@ -1,17 +1,89 @@ use { crate::{ api::{ApiBlockChainState, NetworkId, RestError, StateTag}, - history::RequestStatus, + config::LATENCY_BUCKETS, + history::{RequestQueryBuilder, RequestStatus, SearchField}, }, axum::{ extract::{Query, State}, Json, }, chrono::{DateTime, Utc}, + prometheus_client::{ + encoding::{EncodeLabelSet, EncodeLabelValue}, + metrics::{family::Family, histogram::Histogram}, + registry::Registry, + }, + std::sync::Arc, + tokio::{sync::RwLock, time::Instant}, utoipa::IntoParams, }; -#[derive(Debug, serde::Serialize, serde::Deserialize, IntoParams)] +#[derive(Debug)] +pub struct ExplorerMetrics { + results_latency: Family, + count_latency: Family, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, EncodeLabelSet)] +pub struct QueryTags { + search_type: Option, + has_network_id_filter: bool, + has_state_filter: bool, +} + +impl<'a> From> for QueryTags { + fn from(builder: RequestQueryBuilder<'a>) -> Self { + QueryTags { + search_type: builder.search.map(|val| match val { + SearchField::TxHash(_) => SearchType::TxHash, + SearchField::Sender(_) => SearchType::Sender, + SearchField::SequenceNumber(_) => SearchType::SequenceNumber, + }), + has_network_id_filter: builder.network_id.is_some(), + has_state_filter: builder.state.is_some(), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, EncodeLabelValue)] +enum SearchType { + TxHash, + Sender, + SequenceNumber, +} + +impl ExplorerMetrics { + pub async fn new(metrics_registry: Arc>) -> Self { + let mut guard = metrics_registry.write().await; + let sub_registry = guard.sub_registry_with_prefix("explorer"); + + let results_latency = Family::::new_with_constructor(|| { + Histogram::new(LATENCY_BUCKETS.into_iter()) + }); + sub_registry.register( + "results_latency", + "The latency of requests to the database to collect the limited results.", + results_latency.clone(), + ); + + let count_latency = Family::::new_with_constructor(|| { + Histogram::new(LATENCY_BUCKETS.into_iter()) + }); + sub_registry.register( + "count_latency", + "The latency of requests to the database to collect the total matching result count.", + count_latency.clone(), + ); + + Self { + results_latency, + count_latency, + } + } +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, IntoParams)] #[into_params(parameter_in=Query)] pub struct ExplorerQueryParams { /// Only return logs that are newer or equal to this timestamp. Timestamp is in ISO 8601 format with UTC timezone. @@ -96,7 +168,13 @@ pub async fn explorer( query = query.max_timestamp(max_timestamp); } - let (requests, total_results) = tokio::join!(query.execute(), query.count_results()); + let results_latency = &state.explorer_metrics.results_latency; + let count_latency = &state.explorer_metrics.count_latency; + let query_tags = &query.clone().into(); + let (requests, total_results) = tokio::join!( + measure_latency(results_latency, query_tags, query.execute()), + measure_latency(count_latency, query_tags, query.count_results()) + ); let requests = requests.map_err(|_| RestError::TemporarilyUnavailable)?; let total_results = total_results.map_err(|_| RestError::TemporarilyUnavailable)?; @@ -105,3 +183,19 @@ pub async fn explorer( total_results, })) } + +async fn measure_latency( + metric: &Family, + query_tags: &QueryTags, + function: F, +) -> T +where + F: std::future::Future, +{ + let start = Instant::now(); + let return_value = function.await; + metric + .get_or_create(query_tags) + .observe(start.elapsed().as_secs_f64()); + return_value +} diff --git a/apps/fortuna/src/config.rs b/apps/fortuna/src/config.rs index 806b845005..1d96a69c61 100644 --- a/apps/fortuna/src/config.rs +++ b/apps/fortuna/src/config.rs @@ -11,8 +11,9 @@ use { }; pub use { generate::GenerateOptions, get_request::GetRequestOptions, inspect::InspectOptions, - register_provider::RegisterProviderOptions, request_randomness::RequestRandomnessOptions, - run::RunOptions, setup_provider::SetupProviderOptions, withdraw_fees::WithdrawFeesOptions, + prometheus_client::metrics::histogram::Histogram, register_provider::RegisterProviderOptions, + request_randomness::RequestRandomnessOptions, run::RunOptions, + setup_provider::SetupProviderOptions, withdraw_fees::WithdrawFeesOptions, }; mod generate; @@ -367,3 +368,9 @@ impl SecretString { Ok(None) } } + +/// This is a histogram with a bucket configuration appropriate for most things +/// which measure latency to external services. +pub const LATENCY_BUCKETS: [f64; 11] = [ + 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0, +]; diff --git a/apps/fortuna/src/eth_utils/traced_client.rs b/apps/fortuna/src/eth_utils/traced_client.rs index 184c972eca..ee66ce2697 100644 --- a/apps/fortuna/src/eth_utils/traced_client.rs +++ b/apps/fortuna/src/eth_utils/traced_client.rs @@ -1,5 +1,5 @@ use { - crate::api::ChainId, + crate::{api::ChainId, config::LATENCY_BUCKETS}, anyhow::Result, axum::async_trait, ethers::{ @@ -42,12 +42,7 @@ impl RpcMetrics { ); let latency = Family::::new_with_constructor(|| { - Histogram::new( - [ - 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0, - ] - .into_iter(), - ) + Histogram::new(LATENCY_BUCKETS.into_iter()) }); sub_registry.register( "latency", diff --git a/apps/fortuna/src/history.rs b/apps/fortuna/src/history.rs index 4add2a648c..03c5251fe8 100644 --- a/apps/fortuna/src/history.rs +++ b/apps/fortuna/src/history.rs @@ -345,13 +345,13 @@ impl History { #[derive(Debug, Clone)] pub struct RequestQueryBuilder<'a> { pool: &'a Pool, - search: Option, - network_id: Option, - state: Option, - limit: i64, - offset: i64, - min_timestamp: DateTime, - max_timestamp: DateTime, + pub search: Option, + pub network_id: Option, + pub state: Option, + pub limit: i64, + pub offset: i64, + pub min_timestamp: DateTime, + pub max_timestamp: DateTime, } impl<'a> RequestQueryBuilder<'a> { @@ -503,7 +503,7 @@ pub enum RequestQueryBuilderError { } #[derive(Debug, Clone)] -enum SearchField { +pub enum SearchField { TxHash(TxHash), Sender(Address), SequenceNumber(i64), diff --git a/apps/fortuna/src/keeper/keeper_metrics.rs b/apps/fortuna/src/keeper/keeper_metrics.rs index f1c37f0cf7..bc0f69c1e0 100644 --- a/apps/fortuna/src/keeper/keeper_metrics.rs +++ b/apps/fortuna/src/keeper/keeper_metrics.rs @@ -69,24 +69,19 @@ impl Default for KeeperMetrics { requests_reprocessed: Family::default(), reveals: Family::default(), request_duration_ms: Family::new_with_constructor(|| { - Histogram::new( - vec![ - 1000.0, 2500.0, 5000.0, 7500.0, 10000.0, 20000.0, 30000.0, 40000.0, - 50000.0, 60000.0, 120000.0, 180000.0, 240000.0, 300000.0, 600000.0, - ] - .into_iter(), - ) + Histogram::new(vec![ + 1000.0, 2500.0, 5000.0, 7500.0, 10000.0, 20000.0, 30000.0, 40000.0, 50000.0, + 60000.0, 120000.0, 180000.0, 240000.0, 300000.0, 600000.0, + ]) }), retry_count: Family::new_with_constructor(|| { - Histogram::new(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 15.0, 20.0].into_iter()) + Histogram::new(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 15.0, 20.0]) }), final_gas_multiplier: Family::new_with_constructor(|| { - Histogram::new( - vec![100.0, 125.0, 150.0, 200.0, 300.0, 400.0, 500.0, 600.0].into_iter(), - ) + Histogram::new(vec![100.0, 125.0, 150.0, 200.0, 300.0, 400.0, 500.0, 600.0]) }), final_fee_multiplier: Family::new_with_constructor(|| { - Histogram::new(vec![100.0, 110.0, 120.0, 140.0, 160.0, 180.0, 200.0].into_iter()) + Histogram::new(vec![100.0, 110.0, 120.0, 140.0, 160.0, 180.0, 200.0]) }), gas_price_estimate: Family::default(), highest_revealed_sequence_number: Family::default(),