Skip to content

Commit e719256

Browse files
authored
When first request fails, start subsequent ones in parallel with increasing delay. (#4913)
## Motivation We observed that when the first request failed, the failure was broadcasted to all waiting peers. This slowed down the process and didn't use alternative peers we expect to have the data. ## Proposal Detect if request fails and if so, try all alternative peers before erroring. We spawn a retry operation for every alternative peer with ever-increasing delay, delayed by `75ms` by default. ## Test Plan CI (a test was added for this case). ## Release Plan - These changes should be backported to the latest `testnet` branch, then - be released in a new SDK, ## Links - [reviewer checklist](https://github.com/linera-io/linera-protocol/blob/main/CONTRIBUTING.md#reviewer-checklist)
1 parent 9d13e2a commit e719256

File tree

6 files changed

+425
-61
lines changed

6 files changed

+425
-61
lines changed

CLI.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,9 @@ Client implementation and command-line tool for the Linera blockchain
201201
* `--alpha <ALPHA>` — Smoothing factor for Exponential Moving Averages (0 < alpha < 1). Higher values give more weight to recent observations. Typical values are between 0.01 and 0.5. A value of 0.1 means that 10% of the new observation is considered and 90% of the previous average is retained
202202

203203
Default value: `0.1`
204+
* `--alternative-peers-retry-delay-ms <ALTERNATIVE_PEERS_RETRY_DELAY_MS>` — Delay in milliseconds between starting requests to different peers. This helps to stagger requests and avoid overwhelming the network
205+
206+
Default value: `150`
204207
* `--storage <STORAGE_CONFIG>` — Storage configuration for the blockchain history
205208
* `--storage-max-concurrent-queries <STORAGE_MAX_CONCURRENT_QUERIES>` — The maximal number of simultaneous queries to the database
206209
* `--storage-max-stream-queries <STORAGE_MAX_STREAM_QUERIES>` — The maximal number of simultaneous stream queries to the database

linera-client/src/client_options.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,15 @@ pub struct ClientContextOptions {
230230
env = "LINERA_REQUESTS_SCHEDULER_ALPHA"
231231
)]
232232
pub alpha: f64,
233+
234+
/// Delay in milliseconds between starting requests to different peers.
235+
/// This helps to stagger requests and avoid overwhelming the network.
236+
#[arg(
237+
long,
238+
default_value_t = linera_core::client::requests_scheduler::STAGGERED_DELAY_MS,
239+
env = "LINERA_REQUESTS_SCHEDULER_ALTERNATIVE_PEERS_RETRY_DELAY_MS"
240+
)]
241+
pub alternative_peers_retry_delay_ms: u64,
233242
}
234243

235244
impl ClientContextOptions {
@@ -273,6 +282,7 @@ impl ClientContextOptions {
273282
cache_max_size: self.cache_max_size,
274283
max_request_ttl_ms: self.max_request_ttl_ms,
275284
alpha: self.alpha,
285+
retry_delay_ms: self.alternative_peers_retry_delay_ms,
276286
}
277287
}
278288
}

linera-core/src/client/requests_scheduler/in_flight_tracker.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,21 @@ impl<N: Clone> InFlightTracker<N> {
173173
let peers = entry.alternative_peers.read().await;
174174
Some(peers.clone())
175175
}
176+
177+
/// Removes a specific peer from the alternative peers list.
178+
///
179+
/// # Arguments
180+
/// - `key`: The request key to look up
181+
/// - `peer`: The peer to remove from alternatives
182+
pub(super) async fn remove_alternative_peer(&self, key: &RequestKey, peer: &N)
183+
where
184+
N: PartialEq + Eq,
185+
{
186+
if let Some(entry) = self.entries.read().await.get(key) {
187+
let mut alt_peers = entry.alternative_peers.write().await;
188+
alt_peers.retain(|p| p != peer);
189+
}
190+
}
176191
}
177192

178193
/// Type of in-flight request match found.

linera-core/src/client/requests_scheduler/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ pub const CACHE_TTL_MS: u64 = 2000;
2121
pub const CACHE_MAX_SIZE: usize = 1000;
2222
pub const MAX_REQUEST_TTL_MS: u64 = 200;
2323
pub const ALPHA_SMOOTHING_FACTOR: f64 = 0.1;
24+
pub const STAGGERED_DELAY_MS: u64 = 150;
2425

2526
/// Configuration for the `RequestsScheduler`.
2627
#[derive(Debug, Clone)]
@@ -35,6 +36,8 @@ pub struct RequestsSchedulerConfig {
3536
pub max_request_ttl_ms: u64,
3637
/// Smoothing factor for Exponential Moving Averages (0 < alpha < 1)
3738
pub alpha: f64,
39+
/// Delay in milliseconds between starting requests to different peers.
40+
pub retry_delay_ms: u64,
3841
}
3942

4043
impl Default for RequestsSchedulerConfig {
@@ -45,6 +48,7 @@ impl Default for RequestsSchedulerConfig {
4548
cache_max_size: CACHE_MAX_SIZE,
4649
max_request_ttl_ms: MAX_REQUEST_TTL_MS,
4750
alpha: ALPHA_SMOOTHING_FACTOR,
51+
retry_delay_ms: STAGGERED_DELAY_MS,
4852
}
4953
}
5054
}

0 commit comments

Comments
 (0)