From 78231b83f5a9cc70ac0586f57e00912f206d1ff1 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Oct 2025 21:12:31 +0000 Subject: [PATCH 1/2] Add request_failovers_triggered metric to Fortuna This metric tracks when backup replicas perform failover to fulfill requests that the primary replica failed to handle. It will be used for alerting when primary instances are not working correctly. Co-Authored-By: Tejas Badadare --- apps/fortuna/src/keeper/keeper_metrics.rs | 11 +++++++++++ apps/fortuna/src/keeper/process_event.rs | 9 +++++++++ 2 files changed, 20 insertions(+) diff --git a/apps/fortuna/src/keeper/keeper_metrics.rs b/apps/fortuna/src/keeper/keeper_metrics.rs index 3fe4423976..d2d00cc5d2 100644 --- a/apps/fortuna/src/keeper/keeper_metrics.rs +++ b/apps/fortuna/src/keeper/keeper_metrics.rs @@ -35,6 +35,7 @@ pub struct KeeperMetrics { pub requests_processed_success: Family, pub requests_processed_failure: Family, pub requests_reprocessed: Family, + pub request_failovers_triggered: Family, pub reveals: Family, pub request_duration_ms: Family, pub retry_count: Family, @@ -66,6 +67,7 @@ impl Default for KeeperMetrics { requests_processed_success: Family::default(), requests_processed_failure: Family::default(), requests_reprocessed: Family::default(), + request_failovers_triggered: Family::default(), reveals: Family::default(), request_duration_ms: Family::new_with_constructor(|| { Histogram::new(vec![ @@ -186,6 +188,12 @@ impl KeeperMetrics { keeper_metrics.requests_reprocessed.clone(), ); + writable_registry.register( + "request_failovers_triggered", + "Number of requests where backup replica performed failover", + keeper_metrics.request_failovers_triggered.clone(), + ); + writable_registry.register( "request_duration_ms", "Time taken to process each successful callback request in milliseconds", @@ -297,6 +305,9 @@ impl KeeperMetrics { .requests_processed_failure .get_or_create(&account_label); let _ = self.requests_reprocessed.get_or_create(&account_label); + let _ = self + .request_failovers_triggered + .get_or_create(&account_label); let _ = self.reveals.get_or_create(&account_label); let _ = self.request_duration_ms.get_or_create(&account_label); let _ = self.retry_count.get_or_create(&account_label); diff --git a/apps/fortuna/src/keeper/process_event.rs b/apps/fortuna/src/keeper/process_event.rs index 646bc96204..25befd91dd 100644 --- a/apps/fortuna/src/keeper/process_event.rs +++ b/apps/fortuna/src/keeper/process_event.rs @@ -91,6 +91,15 @@ pub async fn process_event_with_backoff( ); } } + + let account_label = AccountLabel { + chain_id: chain_state.id.clone(), + address: chain_state.provider_address.to_string(), + }; + metrics + .request_failovers_triggered + .get_or_create(&account_label) + .inc(); } } From 191a09f73bb47013a3ed86573cfcab3d8ed84890 Mon Sep 17 00:00:00 2001 From: Tejas Badadare Date: Mon, 6 Oct 2025 14:42:50 -0700 Subject: [PATCH 2/2] chore: doc, version bump --- Cargo.lock | 2 +- apps/fortuna/Cargo.toml | 2 +- apps/fortuna/src/keeper/keeper_metrics.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 645c318842..d80dfcabd3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3094,7 +3094,7 @@ dependencies = [ [[package]] name = "fortuna" -version = "9.2.1" +version = "9.2.2" dependencies = [ "anyhow", "axum 0.6.20", diff --git a/apps/fortuna/Cargo.toml b/apps/fortuna/Cargo.toml index be76779b71..6fceb9492f 100644 --- a/apps/fortuna/Cargo.toml +++ b/apps/fortuna/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fortuna" -version = "9.2.1" +version = "9.2.2" edition = "2021" [lib] diff --git a/apps/fortuna/src/keeper/keeper_metrics.rs b/apps/fortuna/src/keeper/keeper_metrics.rs index d2d00cc5d2..373bf4e2b1 100644 --- a/apps/fortuna/src/keeper/keeper_metrics.rs +++ b/apps/fortuna/src/keeper/keeper_metrics.rs @@ -190,7 +190,7 @@ impl KeeperMetrics { writable_registry.register( "request_failovers_triggered", - "Number of requests where backup replica performed failover", + "Number of requests where backup replica attemped to fulfill the request", keeper_metrics.request_failovers_triggered.clone(), );