Skip to content

Commit 0a32fd7

Browse files
committed
add exponential backoff to cch action retry
Replaces the fixed 1-second retry delay with exponential backoff to reduce load during persistent failures and improve recovery behavior. Delays start at 1 second and double on each retry, capped at 10 minutes.
1 parent 7649b24 commit 0a32fd7

File tree

1 file changed

+27
-11
lines changed

1 file changed

+27
-11
lines changed

crates/fiber-lib/src/cch/actor.rs

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,15 @@ use crate::invoice::{CkbInvoice, Currency, InvoiceBuilder};
2929
use crate::time::{Duration, SystemTime, UNIX_EPOCH};
3030

3131
pub const DEFAULT_ORDER_EXPIRY_SECONDS: u64 = 86400; // 24 hours
32-
pub const ACTION_RETRY_INTERNAL_MILLIS: u64 = 1000; // 1s
32+
pub const ACTION_RETRY_BASE_MILLIS: u64 = 1000; // 1 second initial delay
33+
pub const ACTION_RETRY_MAX_MILLIS: u64 = 600_000; // 10 minute max delay
34+
35+
fn calculate_retry_delay(retry_count: u32) -> Duration {
36+
// Exponential backoff starting from ACTION_RETRY_BASE_MILLIS, capped at ACTION_RETRY_MAX_MILLIS
37+
let max_shift = (ACTION_RETRY_MAX_MILLIS / ACTION_RETRY_BASE_MILLIS).ilog2();
38+
let delay = ACTION_RETRY_BASE_MILLIS.saturating_mul(1 << retry_count.min(max_shift));
39+
Duration::from_millis(delay.min(ACTION_RETRY_MAX_MILLIS))
40+
}
3341

3442
#[derive(Clone, Debug, Deserialize)]
3543
pub struct SendBTC {
@@ -53,6 +61,7 @@ pub enum CchMessage {
5361
ExecuteAction {
5462
payment_hash: Hash256,
5563
action: CchOrderAction,
64+
retry_count: u32,
5665
},
5766

5867
/// Test-only message to insert an order directly into the database
@@ -216,24 +225,30 @@ impl Actor for CchActor {
216225
CchMessage::ExecuteAction {
217226
payment_hash,
218227
action,
228+
retry_count,
219229
} => {
220230
let order = match state.orders_db.get_cch_order(&payment_hash).await {
221231
Err(CchDbError::NotFound(_)) => return Ok(()),
222232
Err(err) => return Err(err.into()),
223233
Ok(order) => order,
224234
};
225235
if let Err(err) = ActionDispatcher::execute(state, &myself, &order, action).await {
226-
tracing::error!("failed to execute action {:?}: {}", action, err);
227-
// Retry the action later. The action executor will only
228-
// cease retrying if it handles the error internally and
229-
// returns OK.
230-
myself.send_after(
231-
Duration::from_millis(ACTION_RETRY_INTERNAL_MILLIS),
232-
move || CchMessage::ExecuteAction {
233-
payment_hash,
234-
action,
235-
},
236+
let delay = calculate_retry_delay(retry_count);
237+
tracing::error!(
238+
"failed to execute action {:?} (retry {}): {}, retrying in {:?}",
239+
action,
240+
retry_count,
241+
err,
242+
delay
236243
);
244+
// Retry the action later with exponential backoff. The action
245+
// executor will only cease retrying if it handles the error
246+
// internally and returns OK.
247+
myself.send_after(delay, move || CchMessage::ExecuteAction {
248+
payment_hash,
249+
action,
250+
retry_count: retry_count.saturating_add(1),
251+
});
237252
}
238253

239254
Ok(())
@@ -439,6 +454,7 @@ fn append_actions(
439454
myself.send_message(CchMessage::ExecuteAction {
440455
payment_hash,
441456
action,
457+
retry_count: 0,
442458
})?;
443459
}
444460
Ok(())

0 commit comments

Comments
 (0)