From f20da924d1e37e4500511fd255f3f0c7bc8d720c Mon Sep 17 00:00:00 2001 From: Jayant Krishnamurthy Date: Fri, 10 Jan 2025 13:00:01 -0800 Subject: [PATCH 1/2] better success and failure metrics --- apps/fortuna/src/keeper.rs | 89 +++++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 29 deletions(-) diff --git a/apps/fortuna/src/keeper.rs b/apps/fortuna/src/keeper.rs index 38da57e2df..bc3292022f 100644 --- a/apps/fortuna/src/keeper.rs +++ b/apps/fortuna/src/keeper.rs @@ -71,6 +71,8 @@ pub struct KeeperMetrics { pub total_gas_spent: Family>, pub requests: Family, pub requests_processed: Family, + pub requests_processed_success: Family, + pub requests_processed_failure: Family, pub requests_reprocessed: Family, pub reveals: Family, pub request_duration_ms: Family, @@ -87,6 +89,8 @@ impl Default for KeeperMetrics { total_gas_spent: Family::default(), requests: Family::default(), requests_processed: Family::default(), + requests_processed_success: Family::default(), + requests_processed_failure: Family::default(), requests_reprocessed: Family::default(), reveals: Family::default(), request_duration_ms: Family::new_with_constructor(|| { @@ -131,6 +135,18 @@ impl KeeperMetrics { keeper_metrics.requests_processed.clone(), ); + writable_registry.register( + "requests_processed_success", + "Number of requests processed successfully", + keeper_metrics.requests_processed_success.clone(), + ); + + writable_registry.register( + "requests_processed_failure", + "Number of requests processed with failure", + keeper_metrics.requests_processed_failure.clone(), + ); + writable_registry.register( "reveal", "Number of reveals", @@ -375,20 +391,19 @@ pub async fn process_event_with_backoff( metrics: Arc, ) { let start_time = std::time::Instant::now(); + let account_label = AccountLabel { + chain_id: chain_state.id.clone(), + address: chain_state.provider_address.to_string(), + }; - metrics - .requests - .get_or_create(&AccountLabel { - chain_id: chain_state.id.clone(), - address: chain_state.provider_address.to_string(), - }) - .inc(); + metrics.requests.get_or_create(&account_label).inc(); tracing::info!("Started processing event"); let backoff = ExponentialBackoff { max_elapsed_time: Some(Duration::from_secs(300)), // retry for 5 minutes ..Default::default() }; - match backoff::future::retry_notify( + + let success = backoff::future::retry_notify( backoff, || async { process_event(&event, &chain_state, &contract, gas_limit, metrics.clone()).await @@ -397,32 +412,48 @@ pub async fn process_event_with_backoff( tracing::error!("Error happened at {:?}: {}", dur, e); }, ) - .await - { - Ok(()) => { - tracing::info!("Processed event",); - } - Err(e) => { - tracing::error!("Failed to process event: {:?}", e); - } - } + .await; - let duration_ms = start_time.elapsed().as_millis() as f64; - metrics - .request_duration_ms - .get_or_create(&AccountLabel { - chain_id: chain_state.id.clone(), - address: chain_state.provider_address.to_string(), - }) - .observe(duration_ms); + let duration = start_time.elapsed(); metrics .requests_processed - .get_or_create(&AccountLabel { - chain_id: chain_state.id.clone(), - address: chain_state.provider_address.to_string(), - }) + .get_or_create(&account_label) .inc(); + + match success { + Ok(()) => { + tracing::info!("Processed event successfully in {:?}", duration); + + metrics + .requests_processed_success + .get_or_create(&account_label) + .inc(); + + metrics + .request_duration_ms + .get_or_create(&account_label) + .observe(duration.as_millis() as f64); + } + Err(e) => { + // In case the callback did not succeed, we double-check that the request is still on-chain. + // If the request is no longer on-chain, one of the transactions we sent likely succeeded, but + // the RPC gave us an error anyway. + let req = chain_state + .contract + .get_request(event.provider_address, event.sequence_number) + .await; + tracing::error!("Failed to process event: {:?}. Request: {:?}", e, req); + + // We only count failures for cases where we are completely certain that the callback failed. + if req.is_ok_and(|x| x.is_some()) { + metrics + .requests_processed_failure + .get_or_create(&account_label) + .inc(); + } + } + } } const TX_CONFIRMATION_TIMEOUT_SECS: u64 = 30; From 0e777c5913184083e874c0ac20209763c5f620f1 Mon Sep 17 00:00:00 2001 From: Jayant Krishnamurthy Date: Fri, 10 Jan 2025 13:01:13 -0800 Subject: [PATCH 2/2] comment --- apps/fortuna/src/keeper.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/fortuna/src/keeper.rs b/apps/fortuna/src/keeper.rs index bc3292022f..12b35efadd 100644 --- a/apps/fortuna/src/keeper.rs +++ b/apps/fortuna/src/keeper.rs @@ -185,7 +185,7 @@ impl KeeperMetrics { writable_registry.register( "request_duration_ms", - "Time taken to process each callback request in milliseconds", + "Time taken to process each successful callback request in milliseconds", keeper_metrics.request_duration_ms.clone(), );