Remove semaphore from RequestsScheduler (#4869)

deuszx · web-flow · commit 8b016be58297 · 2025-10-28T15:08:03.000Z
## Motivation Semaphore might lead to deadlocks if buggy usage and we don't want know yet if its usage here is improvement. ## Proposal Remove Semaphore, track current in-flight requests number only. ## Test Plan CI ## Release Plan - Nothing to do / These changes follow the usual release cycle. ## Links - [reviewer checklist](https://github.com/linera-io/linera-protocol/blob/main/CONTRIBUTING.md#reviewer-checklist)
diff --git a/CLI.md b/CLI.md
@@ -185,9 +185,6 @@ Client implementation and command-line tool for the Linera blockchain
   Default value: `20000`
 * `--max-joined-tasks <MAX_JOINED_TASKS>` — Maximum number of tasks that can are joined concurrently in the client
 
-  Default value: `100`
-* `--max-in-flight-requests <MAX_IN_FLIGHT_REQUESTS>` — Maximum concurrent requests per validator node
-
   Default value: `100`
 * `--max-accepted-latency-ms <MAX_ACCEPTED_LATENCY_MS>` — Maximum expected latency in milliseconds for score normalization
 
diff --git a/linera-client/src/client_options.rs b/linera-client/src/client_options.rs
@@ -187,14 +187,6 @@ pub struct ClientContextOptions {
     #[arg(long, default_value = "100")]
     pub max_joined_tasks: usize,
 
-    /// Maximum concurrent requests per validator node.
-    #[arg(
-        long,
-        default_value_t = linera_core::client::requests_scheduler::MAX_IN_FLIGHT_REQUESTS,
-        env = "LINERA_REQUESTS_SCHEDULER_MAX_IN_FLIGHT_REQUESTS"
-    )]
-    pub max_in_flight_requests: usize,
-
     /// Maximum expected latency in milliseconds for score normalization.
     #[arg(
         long,
@@ -276,7 +268,6 @@ impl ClientContextOptions {
         &self,
     ) -> linera_core::client::RequestsSchedulerConfig {
         linera_core::client::RequestsSchedulerConfig {
-            max_in_flight_requests: self.max_in_flight_requests,
             max_accepted_latency_ms: self.max_accepted_latency_ms,
             cache_ttl_ms: self.cache_ttl_ms,
             cache_max_size: self.cache_max_size,
diff --git a/linera-core/src/client/requests_scheduler/mod.rs b/linera-core/src/client/requests_scheduler/mod.rs
@@ -25,8 +25,6 @@ pub const ALPHA_SMOOTHING_FACTOR: f64 = 0.1;
 /// Configuration for the `RequestsScheduler`.
 #[derive(Debug, Clone)]
 pub struct RequestsSchedulerConfig {
-    /// Maximum concurrent requests per validator node
-    pub max_in_flight_requests: usize,
     /// Maximum expected latency in milliseconds for score normalization
     pub max_accepted_latency_ms: f64,
     /// Time-to-live for cached responses in milliseconds
@@ -42,7 +40,6 @@ pub struct RequestsSchedulerConfig {
 impl Default for RequestsSchedulerConfig {
     fn default() -> Self {
         Self {
-            max_in_flight_requests: MAX_IN_FLIGHT_REQUESTS,
             max_accepted_latency_ms: MAX_ACCEPTED_LATENCY_MS,
             cache_ttl_ms: CACHE_TTL_MS,
             cache_max_size: CACHE_MAX_SIZE,
diff --git a/linera-core/src/client/requests_scheduler/node_info.rs b/linera-core/src/client/requests_scheduler/node_info.rs
@@ -1,10 +1,7 @@
 // Copyright (c) Zefchain Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
-use std::sync::Arc;
-
 use custom_debug_derive::Debug;
-use tokio::sync::Semaphore;
 
 use super::scoring::ScoringWeights;
 use crate::{environment::Environment, remote_node::RemoteNode};
@@ -20,10 +17,6 @@ pub(super) struct NodeInfo<Env: Environment> {
     /// The underlying validator node connection
     pub(super) node: RemoteNode<Env::ValidatorNode>,
 
-    /// Semaphore to limit concurrent in-flight requests.
-    /// It's created with a limit set to `max_in_flight` from configuration.
-    pub(super) in_flight_semaphore: Arc<Semaphore>,
-
     /// Exponential Moving Average of latency in milliseconds
     /// Adapts quickly to changes in response time
     ema_latency_ms: f64,
@@ -44,9 +37,6 @@ pub(super) struct NodeInfo<Env: Environment> {
 
     /// Maximum expected latency in milliseconds for score normalization
     max_expected_latency_ms: f64,
-
-    /// Maximum expected in-flight requests for score normalization
-    max_in_flight: usize,
 }
 
 impl<Env: Environment> NodeInfo<Env> {
@@ -56,19 +46,16 @@ impl<Env: Environment> NodeInfo<Env> {
         weights: ScoringWeights,
         alpha: f64,
         max_expected_latency_ms: f64,
-        max_in_flight: usize,
     ) -> Self {
         assert!(alpha > 0.0 && alpha < 1.0, "Alpha must be in (0, 1) range");
         Self {
             node,
             ema_latency_ms: 100.0, // Start with reasonable latency expectation
             ema_success_rate: 1.0, // Start optimistically with 100% success
-            in_flight_semaphore: Arc::new(tokio::sync::Semaphore::new(max_in_flight)),
             total_requests: 0,
             weights,
             alpha,
             max_expected_latency_ms,
-            max_in_flight,
         }
     }
 
@@ -89,19 +76,12 @@ impl<Env: Environment> NodeInfo<Env> {
         // 2. Success Rate is already normalized [0, 1]
         let success_score = self.ema_success_rate;
 
-        // 3. Normalize Load (lower is better, so we invert)
-        let current_load =
-            (self.max_in_flight as f64) - (self.in_flight_semaphore.available_permits() as f64);
-        let load_score =
-            1.0 - (current_load.min(self.max_in_flight as f64) / self.max_in_flight as f64);
-
         // 4. Apply cold-start penalty for nodes with very few requests
         let confidence_factor = (self.total_requests as f64 / 10.0).min(1.0);
 
         // 5. Combine with weights
-        let raw_score = (self.weights.latency * latency_score)
-            + (self.weights.success * success_score)
-            + (self.weights.load * load_score);
+        let raw_score =
+            (self.weights.latency * latency_score) + (self.weights.success * success_score);
 
         // Apply confidence factor to penalize nodes with too few samples
         raw_score * (0.5 + 0.5 * confidence_factor)
diff --git a/linera-core/src/client/requests_scheduler/scheduler.rs b/linera-core/src/client/requests_scheduler/scheduler.rs
@@ -125,9 +125,6 @@ pub struct RequestsScheduler<Env: Environment> {
     /// Thread-safe map of validator nodes indexed by their public keys.
     /// Each node is wrapped with EMA-based performance tracking information.
     nodes: Arc<tokio::sync::RwLock<BTreeMap<ValidatorPublicKey, NodeInfo<Env>>>>,
-    /// Maximum number of concurrent requests allowed per node.
-    /// Prevents overwhelming individual validators with too many parallel requests.
-    max_requests_per_node: usize,
     /// Default scoring weights applied to new nodes.
     weights: ScoringWeights,
     /// Default EMA smoothing factor for new nodes.
@@ -148,7 +145,6 @@ impl<Env: Environment> RequestsScheduler<Env> {
     ) -> Self {
         Self::with_config(
             nodes,
-            config.max_in_flight_requests,
             ScoringWeights::default(),
             config.alpha,
             config.max_accepted_latency_ms,
@@ -169,10 +165,8 @@ impl<Env: Environment> RequestsScheduler<Env> {
     /// - `cache_ttl`: Time-to-live for cached responses
     /// - `max_cache_size`: Maximum number of entries in the cache
     /// - `max_request_ttl`: Maximum latency for an in-flight request before we stop deduplicating it
-    #[expect(clippy::too_many_arguments)]
     pub fn with_config(
         nodes: impl IntoIterator<Item = RemoteNode<Env::ValidatorNode>>,
-        max_requests_per_node: usize,
         weights: ScoringWeights,
         alpha: f64,
         max_expected_latency_ms: f64,
@@ -188,18 +182,11 @@ impl<Env: Environment> RequestsScheduler<Env> {
                     .map(|node| {
                         (
                             node.public_key,
-                            NodeInfo::with_config(
-                                node,
-                                weights,
-                                alpha,
-                                max_expected_latency_ms,
-                                max_requests_per_node,
-                            ),
+                            NodeInfo::with_config(node, weights, alpha, max_expected_latency_ms),
                         )
                     })
                     .collect(),
             )),
-            max_requests_per_node,
             weights,
             alpha,
             max_expected_latency: max_expected_latency_ms,
@@ -474,19 +461,11 @@ impl<Env: Environment> RequestsScheduler<Env> {
         let start_time = Instant::now();
         let public_key = peer.public_key;
 
-        // Acquire request slot
-        let nodes = self.nodes.read().await;
-        let node = nodes.get(&public_key).expect("Node must exist");
-        let semaphore = node.in_flight_semaphore.clone();
-        let permit = semaphore.acquire().await.unwrap();
-        drop(nodes);
-
         // Execute the operation
         let result = operation(peer).await;
 
         // Update metrics and release slot
         let response_time_ms = start_time.elapsed().as_millis() as u64;
-        drop(permit); // Explicitly drop the permit to release the slot
         let is_success = result.is_ok();
         {
             let mut nodes = self.nodes.write().await;
@@ -752,13 +731,7 @@ impl<Env: Environment> RequestsScheduler<Env> {
         let mut nodes = self.nodes.write().await;
         let public_key = node.public_key;
         nodes.entry(public_key).or_insert_with(|| {
-            NodeInfo::with_config(
-                node,
-                self.weights,
-                self.alpha,
-                self.max_expected_latency,
-                self.max_requests_per_node,
-            )
+            NodeInfo::with_config(node, self.weights, self.alpha, self.max_expected_latency)
         });
     }
 }
@@ -791,7 +764,6 @@ mod tests {
     ) -> Arc<RequestsScheduler<TestEnvironment>> {
         let mut manager = RequestsScheduler::with_config(
             vec![], // No actual nodes needed for these tests
-            10,
             ScoringWeights::default(),
             0.1,
             1000.0,
@@ -1020,160 +992,6 @@ mod tests {
         assert_eq!(execution_count.load(Ordering::SeqCst), 2);
     }
 
-    #[tokio::test]
-    async fn test_slot_limiting_blocks_excess_requests() {
-        // Tests the slot limiting mechanism:
-        // - Creates a RequestsScheduler with max_requests_per_node = 2
-        // - Starts two slow requests that acquire both available slots
-        // - Starts a third request and verifies it's blocked waiting for a slot (execution count stays at 2)
-        // - Completes the first request to release a slot
-        // - Verifies the third request now acquires the freed slot and executes (execution count becomes 3)
-        // - Confirms all requests complete successfully
-        use linera_base::identifiers::BlobType;
-
-        use crate::test_utils::{MemoryStorageBuilder, TestBuilder};
-
-        // Create a test environment with one validator
-        let mut builder = TestBuilder::new(
-            MemoryStorageBuilder::default(),
-            1,
-            0,
-            InMemorySigner::new(None),
-        )
-        .await
-        .unwrap();
-
-        // Get the validator node
-        let validator_node = builder.node(0);
-        let validator_public_key = validator_node.name();
-
-        // Create a RemoteNode wrapper
-        let remote_node = RemoteNode {
-            public_key: validator_public_key,
-            node: validator_node,
-        };
-
-        // Create a RequestsScheduler with max_requests_per_node = 2
-        let max_slots = 2;
-        let mut manager: RequestsScheduler<TestEnvironment> = RequestsScheduler::with_config(
-            vec![remote_node.clone()],
-            max_slots,
-            ScoringWeights::default(),
-            0.1,
-            1000.0,
-            Duration::from_secs(60),
-            100,
-            Duration::from_secs(60),
-        );
-        // Replace the tracker with one using a longer timeout for this test
-        manager.in_flight_tracker = InFlightTracker::new(Duration::from_secs(60));
-        let manager = Arc::new(manager);
-
-        // Track execution state
-        let execution_count = Arc::new(AtomicUsize::new(0));
-        let completion_count = Arc::new(AtomicUsize::new(0));
-
-        // Create channels to control when operations complete
-        let (tx1, rx1) = oneshot::channel();
-        let (tx2, rx2) = oneshot::channel();
-
-        // Start first request using with_peer (will block until signaled)
-        let manager_clone1 = Arc::clone(&manager);
-        let remote_node_clone1 = remote_node.clone();
-        let execution_count_clone1 = execution_count.clone();
-        let completion_count_clone1 = completion_count.clone();
-        let key1 = RequestKey::Blob(BlobId::new(CryptoHash::test_hash("blob1"), BlobType::Data));
-
-        let first_request = tokio::spawn(async move {
-            manager_clone1
-                .with_peer(key1, remote_node_clone1, |_peer| async move {
-                    execution_count_clone1.fetch_add(1, Ordering::SeqCst);
-                    // Simulate work by waiting for signal
-                    let _ = rx1.await;
-                    completion_count_clone1.fetch_add(1, Ordering::SeqCst);
-                    Ok(None) // Return Option<Blob>
-                })
-                .await
-        });
-
-        // Give first request time to start and acquire a slot
-        tokio::time::sleep(Duration::from_millis(50)).await;
-        assert_eq!(execution_count.load(Ordering::SeqCst), 1);
-
-        // Start second request using with_peer (will block until signaled)
-        let manager_clone2 = Arc::clone(&manager);
-        let remote_node_clone2 = remote_node.clone();
-        let execution_count_clone2 = execution_count.clone();
-        let completion_count_clone2 = completion_count.clone();
-        let key2 = RequestKey::Blob(BlobId::new(CryptoHash::test_hash("blob2"), BlobType::Data));
-
-        let second_request = tokio::spawn(async move {
-            manager_clone2
-                .with_peer(key2, remote_node_clone2, |_peer| async move {
-                    execution_count_clone2.fetch_add(1, Ordering::SeqCst);
-                    // Simulate work by waiting for signal
-                    let _ = rx2.await;
-                    completion_count_clone2.fetch_add(1, Ordering::SeqCst);
-                    Ok(None) // Return Option<Blob>
-                })
-                .await
-        });
-
-        // Give second request time to start and acquire the second slot
-        tokio::time::sleep(Duration::from_millis(50)).await;
-        assert_eq!(execution_count.load(Ordering::SeqCst), 2);
-
-        // Start third request - this should be blocked waiting for a slot
-        let remote_node_clone3 = remote_node.clone();
-        let execution_count_clone3 = execution_count.clone();
-        let completion_count_clone3 = completion_count.clone();
-        let key3 = RequestKey::Blob(BlobId::new(CryptoHash::test_hash("blob3"), BlobType::Data));
-
-        let third_request = tokio::spawn(async move {
-            manager
-                .with_peer(key3, remote_node_clone3, |_peer| async move {
-                    execution_count_clone3.fetch_add(1, Ordering::SeqCst);
-                    completion_count_clone3.fetch_add(1, Ordering::SeqCst);
-                    Ok(None) // Return Option<Blob>
-                })
-                .await
-        });
-
-        // Give third request time to try acquiring a slot
-        tokio::time::sleep(Duration::from_millis(100)).await;
-
-        // Third request should still be waiting (not executed yet)
-        assert_eq!(
-            execution_count.load(Ordering::SeqCst),
-            2,
-            "Third request should be waiting for a slot"
-        );
-
-        // Complete the first request to release a slot
-        tx1.send(()).unwrap();
-
-        // Wait for first request to complete and third request to start
-        tokio::time::sleep(Duration::from_millis(100)).await;
-
-        // Now the third request should have acquired the freed slot and started executing
-        assert_eq!(
-            execution_count.load(Ordering::SeqCst),
-            3,
-            "Third request should now be executing"
-        );
-
-        // Complete remaining requests
-        tx2.send(()).unwrap();
-
-        // Wait for all requests to complete
-        let _result1 = first_request.await.unwrap();
-        let _result2 = second_request.await.unwrap();
-        let _result3 = third_request.await.unwrap();
-
-        // Verify all completed
-        assert_eq!(completion_count.load(Ordering::SeqCst), 3);
-    }
-
     #[tokio::test]
     async fn test_alternative_peers_registered_on_deduplication() {
         use linera_base::identifiers::BlobType;
@@ -1203,7 +1021,6 @@ mod tests {
         let manager: Arc<RequestsScheduler<TestEnvironment>> =
             Arc::new(RequestsScheduler::with_config(
                 nodes.clone(),
-                1,
                 ScoringWeights::default(),
                 0.1,
                 1000.0,
diff --git a/linera-web/src/lib.rs b/linera-web/src/lib.rs
@@ -93,7 +93,6 @@ pub const OPTIONS: ClientContextOptions = ClientContextOptions {
     sender_chain_worker_ttl: Duration::from_millis(200),
     grace_period: linera_core::DEFAULT_GRACE_PERIOD,
     max_joined_tasks: 100,
-    max_in_flight_requests: linera_core::client::requests_scheduler::MAX_IN_FLIGHT_REQUESTS,
     max_accepted_latency_ms: linera_core::client::requests_scheduler::MAX_ACCEPTED_LATENCY_MS,
     cache_ttl_ms: linera_core::client::requests_scheduler::CACHE_TTL_MS,
     cache_max_size: linera_core::client::requests_scheduler::CACHE_MAX_SIZE,