Update; includes new logic.

hoytak · hoytak · commit 9dd1b3b6aef4 · 2025-07-18T13:48:16.000-07:00
diff --git a/cas_client/src/adaptive_concurrency/controller.rs b/cas_client/src/adaptive_concurrency/controller.rs
diff --git a/cas_client/src/adaptive_concurrency/latency_prediction.rs b/cas_client/src/adaptive_concurrency/latency_prediction.rs
@@ -0,0 +1,182 @@
+use std::time::Duration;
+
+use tokio::time::Instant;
+
+/// A latency predictor using a numerically stable, exponentially decayed linear regression:
+///
+/// We fit a model of the form:
+///   duration_secs ≈ base_time_secs + size_bytes * inv_throughput
+/// which is equivalent to:
+///   duration_secs ≈ intercept + slope * size_bytes
+///
+/// Internally, we use a stable, online update method based on weighted means and covariances:
+/// - mean_x, mean_y: weighted means of size and duration
+/// - s_xx, s_xy: exponentially decayed sums of (x - mean_x)^2 and (x - mean_x)(y - mean_y)
+///
+/// We apply decay on each update using exp2(-elapsed / half_life).
+///
+/// This avoids numerical instability from large sums and is robust to shifting distributions.
+pub struct LatencyPredictor {
+    sum_w: f64,
+    mean_x: f64,
+    mean_y: f64,
+    s_xx: f64,
+    s_xy: f64,
+
+    base_time_secs: f64,
+    inv_throughput: f64,
+    decay_half_life_secs: f64,
+    last_update: Instant,
+}
+
+impl LatencyPredictor {
+    pub fn new(decay_half_life: Duration) -> Self {
+        Self {
+            sum_w: 0.0,
+            mean_x: 0.0,
+            mean_y: 0.0,
+            s_xx: 0.0,
+            s_xy: 0.0,
+            base_time_secs: 120.0, // 2 minutes, but no real weight on this.
+            inv_throughput: 0.0,
+            decay_half_life_secs: decay_half_life.as_secs_f64(),
+            last_update: Instant::now(),
+        }
+    }
+
+    /// Updates the latency model with a new observation.
+    ///
+    /// Applies exponential decay to prior statistics and incorporates the new sample
+    /// using a numerically stable linear regression formula.
+    ///
+    /// - `size_bytes`: the size of the completed transmission.
+    /// - `duration`: the time taken to complete the transmission.
+    /// Updates the latency model with a new observation.
+    ///
+    /// Applies exponential decay to prior statistics and incorporates the new sample
+    /// using a numerically stable linear regression formula.
+    ///
+    /// - `size_bytes`: the size of the completed transmission.
+    /// - `duration`: the time taken to complete the transmission.
+    /// - `n_concurrent`: the number of concurrent connections at the time.
+    pub fn update(&mut self, size_bytes: usize, duration: Duration, avg_concurrent: f64) {
+        let now = Instant::now();
+        let elapsed = now.duration_since(self.last_update).as_secs_f64();
+        let decay = (-elapsed / self.decay_half_life_secs).exp2();
+
+        // Feature x: number of bytes transferred in this time, assuming that multiple similar
+        // connections are active.  This is just a way to treat the
+        let x = (size_bytes as f64) * avg_concurrent.max(1.);
+
+        // Target y: the time it would take to transfer x bytes, i.e. secs / byte.
+        let y = duration.as_secs_f64().max(1e-6);
+
+        // Decay previous statistics
+        self.sum_w *= decay;
+        self.s_xx *= decay;
+        self.s_xy *= decay;
+
+        // Update means with numerically stable method
+        let weight = 1.0;
+        let new_sum_w = self.sum_w + weight;
+        let delta_x = x - self.mean_x;
+        let delta_y = y - self.mean_y;
+
+        let mean_x_new = self.mean_x + (weight * delta_x) / new_sum_w;
+        let mean_y_new = self.mean_y + (weight * delta_y) / new_sum_w;
+
+        self.s_xx += weight * delta_x * (x - mean_x_new);
+        self.s_xy += weight * delta_x * (y - mean_y_new);
+
+        self.mean_x = mean_x_new;
+        self.mean_y = mean_y_new;
+        self.sum_w = new_sum_w;
+
+        if self.s_xx > 1e-8 {
+            let slope = self.s_xy / self.s_xx;
+            let intercept = self.mean_y - slope * self.mean_x;
+
+            self.base_time_secs = intercept;
+            self.inv_throughput = slope;
+        } else {
+            self.base_time_secs = self.mean_y;
+            self.inv_throughput = 0.0;
+        }
+
+        self.last_update = now;
+    }
+
+    /// Predicts the expected completion time for a given transfer size and concurrency level.
+    ///
+    /// First predicts the overall latency of a transfer, assuming that there is no concurrency and
+    /// connections scale with
+    ///
+    /// to reflect how concurrency reduces per-transfer time under stable throughput.
+    ///
+    /// - `size_bytes`: the size of the transfer.
+    /// - `n_concurrent`: the number of concurrent connections.
+    pub fn predicted_latency(&self, size_bytes: u64, avg_concurrent: f64) -> Duration {
+        let predicted_secs_without_concurrency = self.base_time_secs + size_bytes as f64 * self.inv_throughput;
+        let predicted_secs = predicted_secs_without_concurrency * avg_concurrent.max(1.);
+        Duration::from_secs_f64(predicted_secs)
+    }
+
+    pub fn predicted_bandwidth(&self) -> f64 {
+        let query_bytes = 10 * 1024 * 1024;
+
+        // How long would it take to transmit this at full bandwidth
+        let min_latency = self.predicted_latency(query_bytes, 1.);
+
+        // Report bytes per sec in this model.
+        query_bytes as f64 / min_latency.as_secs_f64().max(1e-6)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use tokio::time::{self, Duration as TokioDuration};
+
+    use super::*;
+
+    #[test]
+    fn test_estimator_update() {
+        let mut estimator = LatencyPredictor::new(Duration::from_secs_f64(10.0));
+        estimator.update(1_000_000, Duration::from_millis(500), 1.);
+        let expected = estimator.predicted_latency(1_000_000, 1.);
+        assert!(expected.as_secs_f64() > 0.0);
+    }
+
+    #[test]
+    fn test_converges_to_constant_observation() {
+        let mut predictor = LatencyPredictor::new(Duration::from_secs_f64(10.0));
+        for _ in 0..10 {
+            predictor.update(1000, Duration::from_secs_f64(1.0), 1.);
+        }
+        let prediction = predictor.predicted_latency(1000, 1.);
+        assert!((prediction.as_secs_f64() - 1.0).abs() < 0.01);
+    }
+
+    #[tokio::test]
+    async fn test_decay_weighting_effect() {
+        time::pause();
+        let mut predictor = LatencyPredictor::new(Duration::from_secs_f64(2.0));
+        predictor.update(1000, Duration::from_secs_f64(2.0), 1.);
+        time::advance(TokioDuration::from_secs(2)).await;
+        predictor.update(1000, Duration::from_secs_f64(1.0), 1.);
+        let predicted = predictor.predicted_latency(1000, 1.).as_secs_f64();
+        assert!(predicted > 1.0 && predicted < 1.6);
+    }
+
+    #[test]
+    fn test_scaling_with_concurrency() {
+        let mut predictor = LatencyPredictor::new(Duration::from_secs_f64(10.0));
+        for _ in 0..10 {
+            predictor.update(1000, Duration::from_secs_f64(1.0), 1.);
+        }
+        let predicted_1 = predictor.predicted_latency(1000, 1.).as_secs_f64();
+        let predicted_2 = predictor.predicted_latency(1000, 2.).as_secs_f64();
+        let predicted_4 = predictor.predicted_latency(1000, 4.).as_secs_f64();
+        assert!(predicted_2 > predicted_1);
+        assert!(predicted_4 > predicted_2);
+    }
+}
diff --git a/cas_client/src/adaptive_concurrency/mod.rs b/cas_client/src/adaptive_concurrency/mod.rs
@@ -0,0 +1,4 @@
+mod controller;
+mod latency_prediction;
+
+pub use controller::{AdaptiveConcurrencyController, ConnectionPermit};
diff --git a/cas_client/src/constants.rs b/cas_client/src/constants.rs
@@ -11,32 +11,35 @@ utils::configurable_constants! {
     /// no more retries are attempted.
     ref CLIENT_RETRY_MAX_DURATION_MS: u64 = 6 * 60 * 1000; // 6m
 
-    /// The target time for a small transfer to complete.
-    ref CONCURRENCY_CONTROL_TARGET_TIME_SMALL_TRANSFER_MS : u64 = 10 * 1000;
-
-    /// The target time for a large transfer to complete.  Default is 20 seconds.
-    ref CONCURRENCY_CONTROL_TARGET_TIME_LARGE_TRANSFER_MS : u64 = 20 * 1000;
-
-    /// The size of a large transfer.
-    ref CONCURRENCY_CONTROL_LARGE_TRANSFER_NUM_BYTES : u64 = 64_000_000;
+    /// The maximum amount of time for a transfer to be deamed within target.  Set to 45 sec.
+    ref CONCURRENCY_CONTROL_MAX_WITHIN_TARGET_TRANSFER_TIME_MS: u64 = 45 * 1000;
 
     /// The minimum time in milliseconds between adjustments when increasing the concurrency.
     ref CONCURRENCY_CONTROL_MIN_INCREASE_WINDOW_MS : u64 = 500;
 
     /// The minimum time in milliseconds between adjustments when decreasing the concurrency.
     ref CONCURRENCY_CONTROL_MIN_DECREASE_WINDOW_MS : u64 = 250;
 
-    /// The maximum number of connection successes and failures to examine when adjusting the concurrancy.
-    ref CONCURRENCY_CONTROL_TRACKING_SIZE : usize = 20;
+    /// Observations of observed transfer time and deviances are tracked using exponentially
+    /// weighted decay.  This is parameterized by the half life of a weighting for an observation.
+    /// Thus if this value is 30 sec, it means that observations count for 50% weight after 30 seconds, 25% weight
+    /// after 1 min, etc.  This allows us adapt to changing network conditions and give more
+    /// weight to newer observations, but still maintain history.
+    ///
+    /// There are two things being tracked in this model; a prediction of the latency and a record of
+    /// how accurate the model is.  The primary assumption here is the following:
+    ///
+    ///
+    ref CONCURRENCY_CONTROL_TRACKING_HALF_LIFE_MS : u64 = 30 * 1000;
+
+    ref CONCURRENCY_CONTROL_ACCEPTABLE_DEVIANCE : f64 = 1.1;
+
+    ref CONCURRENCY_CONTROL_CONCURRENCY_INCREASABLE_DEVIANCE: f64 = 1.05;
 
-    /// The maximum number of connection successes and failures to examine when adjusting the concurrancy.
-    ref CONCURRENCY_CONTROL_TARGET_SUCCESS_RATIO_LOWER: f64 = 0.7;
+    /// A failure -- a retry or outright failure -- counts as this weight.
+    ref CONCURRENCY_CONTROL_FAILURE_DEVIANCE_PENALTY : f64 = 1.4;
 
-    /// The maximum number of connection successes and failures to examine when adjusting the concurrancy.
-    ref CONCURRENCY_CONTROL_TARGET_SUCCESS_RATIO_UPPER: f64 = 0.9;
 
-    /// The maximum time window within which to examine successes and failures when adjusting the concurrancy.
-    ref CONCURRENCY_CONTROL_TRACKING_WINDOW_MS : u64 = 30 * 1000;
 
     /// Log the concurrency on this interval.
     ref CONCURRENCY_CONTROL_LOGGING_INTERVAL_MS: u64 = 10 * 1000;
diff --git a/cas_client/src/interface.rs b/cas_client/src/interface.rs
@@ -9,7 +9,7 @@ use merklehash::MerkleHash;
 use progress_tracking::item_tracking::SingleItemProgressUpdater;
 use progress_tracking::upload_tracking::CompletionTracker;
 
-use crate::adaptive_concurrency_control::{AdaptiveConcurrencyController, ConnectionPermit};
+use crate::adaptive_concurrency::{AdaptiveConcurrencyController, ConnectionPermit};
 use crate::constants::{MAX_CONCURRENT_UPLOADS, MIN_CONCURRENT_UPLOADS, NUM_INITIAL_CONCURRENT_UPLOADS};
 use crate::error::Result;
 #[cfg(not(target_family = "wasm"))]
diff --git a/cas_client/src/lib.rs b/cas_client/src/lib.rs
@@ -26,4 +26,4 @@ pub mod remote_client;
 mod retry_wrapper;
 mod upload_progress_stream;
 
-mod adaptive_concurrency_control;
+mod adaptive_concurrency;
diff --git a/cas_client/src/local_client.rs b/cas_client/src/local_client.rs
@@ -22,7 +22,7 @@ use tempfile::TempDir;
 use tokio::runtime::Handle;
 use tracing::{debug, error, info, warn};
 
-use crate::adaptive_concurrency_control::ConnectionPermit;
+use crate::adaptive_concurrency::ConnectionPermit;
 use crate::error::{CasClientError, Result};
 use crate::output_provider::OutputProvider;
 use crate::Client;
diff --git a/cas_client/src/remote_client.rs b/cas_client/src/remote_client.rs
@@ -28,7 +28,7 @@ use utils::auth::AuthConfig;
 #[cfg(not(target_family = "wasm"))]
 use utils::singleflight::Group;
 
-use crate::adaptive_concurrency_control::ConnectionPermit;
+use crate::adaptive_concurrency::ConnectionPermit;
 #[cfg(not(target_family = "wasm"))]
 use crate::download_utils::*;
 use crate::error::{CasClientError, Result};
diff --git a/cas_client/src/retry_wrapper.rs b/cas_client/src/retry_wrapper.rs
@@ -9,7 +9,7 @@ use tokio_retry::strategy::{jitter, ExponentialBackoff};
 use tokio_retry::RetryIf;
 use tracing::{error, info};
 
-use crate::adaptive_concurrency_control::ConnectionPermit;
+use crate::adaptive_concurrency::ConnectionPermit;
 use crate::constants::{CLIENT_RETRY_BASE_DELAY_MS, CLIENT_RETRY_MAX_ATTEMPTS};
 use crate::error::CasClientError;
 use crate::http_client::request_id_from_response;
diff --git a/utils/Cargo.toml b/utils/Cargo.toml
@@ -18,7 +18,7 @@ lazy_static = { workspace = true }
 parking_lot = { workspace = true }
 pin-project = { workspace = true }
 thiserror = { workspace = true }
-tokio = { workspace = true, features = ["time", "rt", "macros"] }
+tokio = { workspace = true, features = ["time", "rt", "macros", "test-util"] }
 tracing = { workspace = true }
 
 [target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
diff --git a/utils/src/adjustable_semaphore.rs b/utils/src/adjustable_semaphore.rs
@@ -55,6 +55,12 @@ impl AdjustableSemaphore {
         self.semaphore.available_permits()
     }
 
+    pub fn active_permits(&self) -> usize {
+        // The number of total permits minus the available permits, not counting for the enqueud permit decreases.
+        (self.total_permits() + self.enqueued_permit_decreases.load(std::sync::atomic::Ordering::Relaxed))
+            .saturating_sub(self.available_permits())
+    }
+
     pub async fn acquire(self: &Arc<Self>) -> Result<AdjustableSemaphorePermit, AcquireError> {
         // A few debug mode consistency checks.
         debug_assert!(self.semaphore.available_permits() <= self.max_permits);
diff --git a/utils/src/exp_weighted_moving_avg.rs b/utils/src/exp_weighted_moving_avg.rs
diff --git a/utils/src/lib.rs b/utils/src/lib.rs