cache increment (#959)

skull8888888 · web-flow · commit c94adbf9c018 · 2025-10-16T23:31:21.000+01:00
* tmp

* fixes

* comment

* key name

* fixes

* comment
diff --git a/app-server/src/cache/in_memory.rs b/app-server/src/cache/in_memory.rs
@@ -52,4 +52,20 @@ impl CacheTrait for InMemoryCache {
         });
         Ok(())
     }
+
+    async fn increment(&self, key: &str, amount: i64) -> Result<Option<i64>, CacheError> {
+        // Note: This is not truly atomic for in-memory cache, but should be fine for dev/testing.
+        // Production should use Redis where increment is atomic.
+        // Like Redis INCRBY, this creates the key with value=0 if it doesn't exist
+        let current_value: i64 = match self.cache.get(key).await {
+            Some(bytes) => serde_json::from_slice(&bytes).map_err(|e| CacheError::SerDeError(e))?,
+            None => 0,
+        };
+
+        let new_value = current_value + amount;
+        let new_bytes = serde_json::to_vec(&new_value).map_err(|e| CacheError::SerDeError(e))?;
+
+        self.cache.insert(String::from(key), new_bytes).await;
+        Ok(Some(new_value))
+    }
 }
diff --git a/app-server/src/cache/keys.rs b/app-server/src/cache/keys.rs
@@ -7,4 +7,4 @@ pub const PROJECT_CACHE_KEY: &str = "project";
 pub const WORKSPACE_LIMITS_CACHE_KEY: &str = "workspace_limits";
 pub const PROJECT_EVALUATORS_BY_PATH_CACHE_KEY: &str = "project_evaluators_by_path";
 
-pub const WORKSPACE_PARTIAL_USAGE_CACHE_KEY: &str = "workspace_partial_usage";
+pub const WORKSPACE_BYTES_USAGE_CACHE_KEY: &str = "workspace_bytes_usage";
diff --git a/app-server/src/cache/mod.rs b/app-server/src/cache/mod.rs
@@ -32,4 +32,10 @@ pub trait CacheTrait {
         T: Serialize + Send;
     async fn remove(&self, key: &str) -> Result<(), CacheError>;
     async fn set_ttl(&self, key: &str, seconds: u64) -> Result<(), CacheError>;
+    /// Atomically increment a numeric value by the given amount.
+    /// If the key doesn't exist, it will be created with value 0 before incrementing.
+    /// Returns the new value after incrementing.
+    /// Callers should use get() first if they need to distinguish between missing keys
+    /// and existing keys (to trigger recomputation logic, for example).
+    async fn increment(&self, key: &str, amount: i64) -> Result<Option<i64>, CacheError>;
 }
diff --git a/app-server/src/cache/redis.rs b/app-server/src/cache/redis.rs
@@ -100,4 +100,18 @@ impl CacheTrait for RedisCache {
             Ok(())
         }
     }
+
+    async fn increment(&self, key: &str, amount: i64) -> Result<Option<i64>, CacheError> {
+        // Use atomic INCRBY command
+        // Note: Redis INCRBY will create the key if it doesn't exist, starting from 0
+        // The caller should check with get() first if they want to handle missing keys differently
+        let result: RedisResult<i64> = self.connection.clone().incr(key, amount).await;
+        match result {
+            Ok(new_value) => Ok(Some(new_value)),
+            Err(e) => {
+                log::error!("Redis increment error: {}", e);
+                Err(CacheError::InternalError(anyhow::Error::from(e)))
+            }
+        }
+    }
 }
diff --git a/app-server/src/ch/limits.rs b/app-server/src/ch/limits.rs
@@ -3,8 +3,6 @@ use chrono::{DateTime, Months, Utc};
 use clickhouse::Client;
 use uuid::Uuid;
 
-use crate::db::stats::WorkspaceLimitsExceeded;
-
 /// Calculate how many complete months have elapsed from start_date to end_date
 /// This mimics Python's dateutil.relativedelta behavior
 fn complete_months_elapsed(start_date: DateTime<Utc>, end_date: DateTime<Utc>) -> u32 {
@@ -23,12 +21,11 @@ fn complete_months_elapsed(start_date: DateTime<Utc>, end_date: DateTime<Utc>) -
     months_elapsed
 }
 
-pub async fn is_workspace_over_limit(
+pub async fn get_workspace_bytes_ingested_by_project_ids(
     clickhouse: Client,
     project_ids: Vec<Uuid>,
     reset_time: DateTime<Utc>,
-    bytes_limit: i64,
-) -> Result<WorkspaceLimitsExceeded> {
+) -> Result<usize> {
     let now = Utc::now();
     let months_elapsed = complete_months_elapsed(reset_time, now);
 
@@ -41,6 +38,7 @@ pub async fn is_workspace_over_limit(
     } else {
         reset_time
     };
+
     let query = "WITH spans_bytes_ingested AS (
       SELECT
         SUM(spans.size_bytes) as spans_bytes_ingested
@@ -67,25 +65,14 @@ pub async fn is_workspace_over_limit(
     FROM spans_bytes_ingested, browser_session_events_bytes_ingested, events_bytes_ingested
     ";
 
-    let total_bytes_ingested = clickhouse
+    let result = clickhouse
         .query(&query)
         .param("project_ids", project_ids)
         .param("latest_reset_time", latest_reset_time.naive_utc())
         .fetch_optional::<usize>()
         .await?;
 
-    let Some(bytes_ingested) = total_bytes_ingested else {
-        log::error!("No bytes ingested found for workspace in ClickHouse");
-        return Ok(WorkspaceLimitsExceeded {
-            steps: false,
-            bytes_ingested: false,
-        });
-    };
-
-    Ok(WorkspaceLimitsExceeded {
-        bytes_ingested: bytes_ingested > (bytes_limit.abs() as usize),
-        steps: false,
-    })
+    Ok(result.unwrap_or(0))
 }
 
 #[cfg(test)]
diff --git a/app-server/src/traces/consumer.rs b/app-server/src/traces/consumer.rs
@@ -149,7 +149,6 @@ async fn process_spans_and_events_batch(
 ) {
     let mut all_spans = Vec::new();
     let mut all_events = Vec::new();
-    let mut project_ids = Vec::new();
     let mut spans_ingested_bytes = Vec::new();
 
     // Process all spans in parallel (heavy processing)
@@ -173,7 +172,6 @@ async fn process_spans_and_events_batch(
     // Collect results from parallel processing
     for (span, events, ingested_bytes) in processing_results {
         spans_ingested_bytes.push(ingested_bytes.clone());
-        project_ids.push(span.project_id);
         all_spans.push(span);
         all_events.extend(events.into_iter());
     }
@@ -206,7 +204,6 @@ async fn process_spans_and_events_batch(
         all_spans,
         spans_ingested_bytes,
         all_events,
-        project_ids,
         db,
         clickhouse,
         cache,
@@ -229,7 +226,6 @@ async fn process_batch(
     mut spans: Vec<Span>,
     spans_ingested_bytes: Vec<IngestedBytes>,
     events: Vec<Event>,
-    project_ids: Vec<Uuid>,
     db: Arc<DB>,
     clickhouse: clickhouse::Client,
     cache: Arc<Cache>,
@@ -383,7 +379,10 @@ async fn process_batch(
         .sum::<usize>()
         + total_events_ingested_bytes;
 
-    for project_id in project_ids {
+    // we get project id from the first span in the batch
+    // because all spans in the batch have the same project id
+    // batching is happening on the Otel SpanProcessor level
+    if let Some(project_id) = stripped_spans.first().map(|s| s.project_id) {
         if is_feature_enabled(Feature::UsageLimit) {
             if let Err(e) = update_workspace_limit_exceeded_by_project_id(
                 db.clone(),
diff --git a/app-server/src/traces/limits.rs b/app-server/src/traces/limits.rs
@@ -3,21 +3,17 @@
 use std::sync::Arc;
 
 use anyhow::Result;
-use chrono::{DateTime, Utc};
 use uuid::Uuid;
 
 use crate::{
     cache::{
         Cache, CacheTrait,
-        keys::{PROJECT_CACHE_KEY, WORKSPACE_LIMITS_CACHE_KEY, WORKSPACE_PARTIAL_USAGE_CACHE_KEY},
+        keys::{PROJECT_CACHE_KEY, WORKSPACE_BYTES_USAGE_CACHE_KEY, WORKSPACE_LIMITS_CACHE_KEY},
     },
-    ch,
+    ch::limits::get_workspace_bytes_ingested_by_project_ids,
     db::{self, DB, projects::ProjectWithWorkspaceBillingInfo, stats::WorkspaceLimitsExceeded},
 };
 
-// Threshold in bytes (16MB) - only recompute workspace limits after this much data is written
-const RECOMPUTE_THRESHOLD_BYTES: usize = 16 * 1024 * 1024; // 16MB
-
 pub async fn get_workspace_limit_exceeded_by_project_id(
     db: Arc<DB>,
     clickhouse: clickhouse::Client,
@@ -39,13 +35,29 @@ pub async fn get_workspace_limit_exceeded_by_project_id(
     match cache_res {
         Ok(Some(workspace_limits_exceeded)) => Ok(workspace_limits_exceeded),
         Ok(None) | Err(_) => {
-            let workspace_limits_exceeded = is_workspace_over_limit(
-                clickhouse,
+            let bytes_ingested = match get_workspace_bytes_ingested_by_project_ids(
+                clickhouse.clone(),
                 project_info.workspace_project_ids,
-                project_info.bytes_limit,
                 project_info.reset_time,
             )
-            .await?;
+            .await
+            {
+                Ok(bytes_ingested) => bytes_ingested as i64,
+                Err(e) => {
+                    log::error!(
+                        "Failed to get workspace bytes ingested for project [{}]: {:?}",
+                        project_id,
+                        e
+                    );
+                    0 as i64
+                }
+            };
+
+            let workspace_limits_exceeded = WorkspaceLimitsExceeded {
+                steps: false,
+                bytes_ingested: bytes_ingested >= project_info.bytes_limit,
+            };
+
             let _ = cache
                 .insert::<WorkspaceLimitsExceeded>(&cache_key, workspace_limits_exceeded.clone())
                 .await;
@@ -62,72 +74,85 @@ pub async fn update_workspace_limit_exceeded_by_project_id(
     written_bytes: usize,
 ) -> Result<()> {
     tokio::spawn(async move {
-        let project_info = get_workspace_info_for_project_id(db.clone(), cache.clone(), project_id)
-            .await
-            .map_err(|e| {
-                log::error!(
-                    "Failed to get workspace info for project [{}]: {:?}",
-                    project_id,
-                    e
-                );
-            })
-            .unwrap();
+        let project_info =
+            match get_workspace_info_for_project_id(db.clone(), cache.clone(), project_id).await {
+                Ok(info) => info,
+                Err(e) => {
+                    log::error!(
+                        "Failed to get workspace info for project [{}]: {:?}",
+                        project_id,
+                        e
+                    );
+                    return;
+                }
+            };
         let workspace_id = project_info.workspace_id;
         if project_info.tier_name.trim().to_lowercase() != "free" {
             // We don't need to update the workspace limits cache for non-free tiers
             return;
         }
 
-        let partial_usage_cache_key = format!("{WORKSPACE_PARTIAL_USAGE_CACHE_KEY}:{workspace_id}");
+        let bytes_usage_cache_key = format!("{WORKSPACE_BYTES_USAGE_CACHE_KEY}:{workspace_id}");
         let limits_cache_key = format!("{WORKSPACE_LIMITS_CACHE_KEY}:{workspace_id}");
 
-        // Get current partial usage from cache
-        let cache_result = cache.get::<usize>(&partial_usage_cache_key).await;
-
-        // If cache is missing or errored, we should recompute
-        let (current_partial_usage, cache_available) = match cache_result {
-            Ok(Some(value)) => (value, true),
-            Ok(None) | Err(_) => (0, false),
-        };
-
-        let new_partial_usage = current_partial_usage + written_bytes;
-
-        // Recompute if: cache was unavailable, or we've accumulated at least RECOMPUTE_THRESHOLD_BYTES
-        let should_recompute = !cache_available || new_partial_usage >= RECOMPUTE_THRESHOLD_BYTES;
-
-        if should_recompute {
-            // Perform the heavy computation
-            let workspace_limits_exceeded = is_workspace_over_limit(
-                clickhouse,
-                project_info.workspace_project_ids,
-                project_info.bytes_limit,
-                project_info.reset_time,
-            )
-            .await
-            .map_err(|e| {
-                log::error!(
-                    "Failed to update workspace limit exceeded for project [{}]: {:?}",
-                    project_id,
-                    e
-                );
-            })
-            .unwrap();
-
-            // Update the limits cache
-            let _ = cache
-                .insert::<WorkspaceLimitsExceeded>(
-                    &limits_cache_key,
-                    workspace_limits_exceeded.clone(),
+        // First, try to read from cache to check if it exists
+        let cache_result = cache.get::<i64>(&bytes_usage_cache_key).await;
+
+        match cache_result {
+            Ok(Some(_)) => {
+                // Cache exists - atomically increment it
+                let increment_result = cache
+                    .increment(&bytes_usage_cache_key, written_bytes as i64)
+                    .await;
+
+                if let Ok(Some(new_partial_usage)) = increment_result {
+                    let workspace_limits_exceeded = WorkspaceLimitsExceeded {
+                        steps: false,
+                        bytes_ingested: new_partial_usage >= project_info.bytes_limit,
+                    };
+
+                    // Update the limits cache
+                    let _ = cache
+                        .insert::<WorkspaceLimitsExceeded>(
+                            &limits_cache_key,
+                            workspace_limits_exceeded,
+                        )
+                        .await;
+                }
+            }
+            Ok(None) | Err(_) => {
+                // Cache miss or error - perform full recomputation
+                let bytes_ingested = match get_workspace_bytes_ingested_by_project_ids(
+                    clickhouse.clone(),
+                    project_info.workspace_project_ids,
+                    project_info.reset_time,
                 )
-                .await;
-
-            // Reset the partial usage counter
-            let _ = cache.insert::<usize>(&partial_usage_cache_key, 0).await;
-        } else {
-            // Just update the partial usage counter
-            let _ = cache
-                .insert::<usize>(&partial_usage_cache_key, new_partial_usage)
-                .await;
+                .await
+                {
+                    Ok(bytes_ingested) => bytes_ingested as i64,
+                    Err(e) => {
+                        log::error!(
+                            "Failed to get workspace bytes ingested for project [{}]: {:?}",
+                            project_id,
+                            e
+                        );
+                        0 as i64
+                    }
+                };
+
+                let workspace_limits_exceeded = WorkspaceLimitsExceeded {
+                    steps: false,
+                    bytes_ingested: bytes_ingested >= project_info.bytes_limit,
+                };
+
+                let _ = cache
+                    .insert::<WorkspaceLimitsExceeded>(&limits_cache_key, workspace_limits_exceeded)
+                    .await;
+
+                let _ = cache
+                    .insert::<i64>(&bytes_usage_cache_key, bytes_ingested as i64)
+                    .await;
+            }
         }
     });
 
@@ -155,16 +180,3 @@ async fn get_workspace_info_for_project_id(
         }
     }
 }
-
-async fn is_workspace_over_limit(
-    clickhouse: clickhouse::Client,
-    project_ids: Vec<Uuid>,
-    bytes_limit: i64,
-    reset_time: DateTime<Utc>,
-) -> Result<WorkspaceLimitsExceeded> {
-    let workspace_limits_exceeded =
-        ch::limits::is_workspace_over_limit(clickhouse, project_ids, reset_time, bytes_limit)
-            .await?;
-
-    Ok(workspace_limits_exceeded)
-}

Original file line number	Diff line number	Diff line change
`@@ -100,4 +100,18 @@ impl CacheTrait for RedisCache {`
`100`	`100`	`Ok(())`
`101`	`101`	`}`
`102`	`102`	`}`
	`103`	`+`
	`104`	`+ async fn increment(&self, key: &str, amount: i64) -> Result<Option<i64>, CacheError> {`
	`105`	`+ // Use atomic INCRBY command`
	`106`	`+ // Note: Redis INCRBY will create the key if it doesn't exist, starting from 0`
	`107`	`+ // The caller should check with get() first if they want to handle missing keys differently`
	`108`	`+ let result: RedisResult<i64> = self.connection.clone().incr(key, amount).await;`
	`109`	`+ match result {`
	`110`	`+ Ok(new_value) => Ok(Some(new_value)),`
	`111`	`+ Err(e) => {`
	`112`	`+ log::error!("Redis increment error: {}", e);`
	`113`	`+ Err(CacheError::InternalError(anyhow::Error::from(e)))`
	`114`	`+ }`
	`115`	`+ }`
	`116`	`+ }`
`103`	`117`	`}`