From 5e5f7c044d917a72795ac123a6f72a299718d6cb Mon Sep 17 00:00:00 2001
From: Ruchir Khaitan <ruchir.khaitan@datadoghq.com>
Date: Mon, 17 Nov 2025 13:09:57 -0500
Subject: [PATCH] fix(ttl_map): initialize time to 1 for correct wrapping sub

Fixes #221. Previously, we encountered a bug at the initial gc tick on
ttl_map where `time` (denoting the number of ticks since map initialization)
starts at 0, but the logic to compute the time to remove the inserted value is

```
free_time = (time - 1) // (number of ticks to ttl)
```

In other words, we maintain a circular buffer of inserts, with a single slot per tick,
and we will delete the item once `buffer.len()` ticks have elapsed (ttl = tick * buffer.len()).

This is all fine, except we implement the logic as:

```
free_time = time.wrapping_sub(1) / buffer.len()
```

which is computing for `u64` `time`:

```
free_time = ((time - 1) mod 2^64) mod buffer.len()
```

when we really just want `free_time = (time - 1) mod buffer.len()`.

Luckily this equality holds as long as ` 0 < time < 2^64`, as for those times `time - 1 mod 2&64 = time`.
This commit changes our behavior to initialize `time` at 1 instead of 0. We don't need to worry about
the overflow case because even if the tick duration was a nanosecond, it would take ~584 years to overflow
64 bits at which point we would surely have other problems besides the momentarily incorrect ttl. For
the underflow case, this should primarily help with unit tests above anything else, as the bug
only happened at `time = 0`.
---
 src/common/ttl_map.rs | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/src/common/ttl_map.rs b/src/common/ttl_map.rs
index 76c95266..2ccbc6fd 100644
--- a/src/common/ttl_map.rs
+++ b/src/common/ttl_map.rs
@@ -184,7 +184,8 @@ where
         Self {
             buckets,
             data: stage_targets,
-            time: Arc::new(AtomicU64::new(0)),
+            // Explicitly initialize `time` to 1 to avoid underflow issues with circular buffer.
+            time: Arc::new(AtomicU64::new(1)),
             gc_scheduler_task: None,
             config,
             #[cfg(test)]
@@ -446,6 +447,34 @@ mod tests {
         assert!(final_time < 100);
     }
 
+    #[tokio::test]
+    async fn test_initial_time() {
+        // Create a map with 7 buckets. 7 is chosen specifically as it
+        // has the property that (2^64 - 1) % 7 = 1, whereas (0 - 1) % 7 = 6.
+        let ttl_map = TTLMap::<String, i32>::_new(TTLMapConfig {
+            ttl: Duration::from_millis(70),
+            tick: Duration::from_millis(10),
+        });
+
+        ttl_map.get_or_init("test_key".to_string(), || 999);
+
+        // Advance GC 3 times, which shouldn't free the first key.
+        for _ in 0..3 {
+            TTLMap::<String, i32>::gc(ttl_map.time.clone(), &ttl_map.buckets);
+        }
+
+        tokio::time::sleep(Duration::from_millis(10)).await;
+        // Check that we still have our key. Have to wait before asserting to avoid the assertion
+        // being spuriously true.
+        assert_eq!(ttl_map.data.len(), 1);
+
+        // Run GC for 4 more steps, at which point the first key should be removed.
+        for _ in 0..4 {
+            TTLMap::<String, i32>::gc(ttl_map.time.clone(), &ttl_map.buckets);
+        }
+        assert_eventually(|| ttl_map.data.is_empty(), Duration::from_millis(100)).await;
+    }
+
     // Run with `cargo test bench_lock_contention --release -- --nocapture` to see output.
     #[tokio::test(flavor = "multi_thread", worker_threads = 16)]
     async fn bench_lock_contention() {