Skip to content

Commit 78231b8

Browse files
Add request_failovers_triggered metric to Fortuna
This metric tracks when backup replicas perform failover to fulfill requests that the primary replica failed to handle. It will be used for alerting when primary instances are not working correctly. Co-Authored-By: Tejas Badadare <[email protected]>
1 parent 6248709 commit 78231b8

File tree

2 files changed

+20
-0
lines changed

2 files changed

+20
-0
lines changed

apps/fortuna/src/keeper/keeper_metrics.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ pub struct KeeperMetrics {
3535
pub requests_processed_success: Family<AccountLabel, Counter>,
3636
pub requests_processed_failure: Family<AccountLabel, Counter>,
3737
pub requests_reprocessed: Family<AccountLabel, Counter>,
38+
pub request_failovers_triggered: Family<AccountLabel, Counter>,
3839
pub reveals: Family<AccountLabel, Counter>,
3940
pub request_duration_ms: Family<AccountLabel, Histogram>,
4041
pub retry_count: Family<AccountLabel, Histogram>,
@@ -66,6 +67,7 @@ impl Default for KeeperMetrics {
6667
requests_processed_success: Family::default(),
6768
requests_processed_failure: Family::default(),
6869
requests_reprocessed: Family::default(),
70+
request_failovers_triggered: Family::default(),
6971
reveals: Family::default(),
7072
request_duration_ms: Family::new_with_constructor(|| {
7173
Histogram::new(vec![
@@ -186,6 +188,12 @@ impl KeeperMetrics {
186188
keeper_metrics.requests_reprocessed.clone(),
187189
);
188190

191+
writable_registry.register(
192+
"request_failovers_triggered",
193+
"Number of requests where backup replica performed failover",
194+
keeper_metrics.request_failovers_triggered.clone(),
195+
);
196+
189197
writable_registry.register(
190198
"request_duration_ms",
191199
"Time taken to process each successful callback request in milliseconds",
@@ -297,6 +305,9 @@ impl KeeperMetrics {
297305
.requests_processed_failure
298306
.get_or_create(&account_label);
299307
let _ = self.requests_reprocessed.get_or_create(&account_label);
308+
let _ = self
309+
.request_failovers_triggered
310+
.get_or_create(&account_label);
300311
let _ = self.reveals.get_or_create(&account_label);
301312
let _ = self.request_duration_ms.get_or_create(&account_label);
302313
let _ = self.retry_count.get_or_create(&account_label);

apps/fortuna/src/keeper/process_event.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,15 @@ pub async fn process_event_with_backoff(
9191
);
9292
}
9393
}
94+
95+
let account_label = AccountLabel {
96+
chain_id: chain_state.id.clone(),
97+
address: chain_state.provider_address.to_string(),
98+
};
99+
metrics
100+
.request_failovers_triggered
101+
.get_or_create(&account_label)
102+
.inc();
94103
}
95104
}
96105

0 commit comments

Comments
 (0)