Skip to content

Commit 3906c5b

Browse files
committed
fix: stop returning expired in-memory cache hits
Signed-off-by: cryo <[email protected]>
1 parent 8d31616 commit 3906c5b

File tree

2 files changed

+51
-28
lines changed

2 files changed

+51
-28
lines changed

src/semantic-router/pkg/cache/cache_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,30 @@ development:
576576
Expect(stats.HitRatio).To(Equal(0.5))
577577
})
578578

579+
It("should skip expired entries during similarity search", func() {
580+
ttlCache := cache.NewInMemoryCache(cache.InMemoryCacheOptions{
581+
Enabled: true,
582+
SimilarityThreshold: 0.1,
583+
MaxEntries: 10,
584+
TTLSeconds: 1,
585+
})
586+
defer ttlCache.Close()
587+
588+
err := ttlCache.AddEntry("ttl-request-id", "ttl-model", "time-sensitive query", []byte("request"), []byte("response"))
589+
Expect(err).NotTo(HaveOccurred())
590+
591+
time.Sleep(1100 * time.Millisecond)
592+
593+
response, found, err := ttlCache.FindSimilar("ttl-model", "time-sensitive query")
594+
Expect(err).NotTo(HaveOccurred())
595+
Expect(found).To(BeFalse())
596+
Expect(response).To(BeNil())
597+
598+
stats := ttlCache.GetStats()
599+
Expect(stats.HitCount).To(Equal(int64(0)))
600+
Expect(stats.MissCount).To(Equal(int64(1)))
601+
})
602+
579603
It("should handle error when updating non-existent pending request", func() {
580604
err := inMemoryCache.UpdateWithResponse("non-existent-query", []byte("response"))
581605
Expect(err).To(HaveOccurred())

src/semantic-router/pkg/cache/inmemory_cache.go

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -230,28 +230,34 @@ func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, e
230230
}
231231

232232
c.mu.RLock()
233-
234-
// Check for expired entries during search
235-
c.cleanupExpiredEntriesReadOnly()
236-
237233
var (
238234
bestIndex = -1
239235
bestEntry CacheEntry
240236
bestSimilarity float32
241237
entriesChecked int
238+
expiredCount int
242239
)
240+
// Capture the lookup time after acquiring the read lock so TTL checks aren’t skewed by embedding work or lock wait
241+
now := time.Now()
243242

244243
// Compare with completed entries for the same model, tracking only the best match
245244
for entryIndex, entry := range c.entries {
245+
// Skip incomplete entries
246246
if entry.ResponseBody == nil {
247-
continue // Skip incomplete entries
247+
continue
248248
}
249249

250250
// Only consider entries for the same model
251251
if entry.Model != model {
252252
continue
253253
}
254254

255+
// Skip entries that have expired before considering them
256+
if c.isExpired(entry, now) {
257+
expiredCount++
258+
continue
259+
}
260+
255261
// Compute semantic similarity using dot product
256262
var dotProduct float32
257263
for i := 0; i < len(queryEmbedding) && i < len(entry.Embedding); i++ {
@@ -272,6 +278,17 @@ func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, e
272278
// Unlock the read lock since we need the write lock to update the access info
273279
c.mu.RUnlock()
274280

281+
// Log if any expired entries were skipped
282+
if expiredCount > 0 {
283+
observability.Debugf("InMemoryCache: excluded %d expired entries during search (TTL: %ds)",
284+
expiredCount, c.ttlSeconds)
285+
observability.LogEvent("cache_expired_entries_found", map[string]interface{}{
286+
"backend": "memory",
287+
"expired_count": expiredCount,
288+
"ttl_seconds": c.ttlSeconds,
289+
})
290+
}
291+
275292
// Handle case where no suitable entries exist
276293
if bestIndex < 0 {
277294
atomic.AddInt64(&c.missCount, 1)
@@ -371,7 +388,7 @@ func (c *InMemoryCache) cleanupExpiredEntries() {
371388

372389
for _, entry := range c.entries {
373390
// Retain entries that are still within their TTL based on last access
374-
if now.Sub(entry.LastAccessAt).Seconds() < float64(c.ttlSeconds) {
391+
if !c.isExpired(entry, now) {
375392
validEntries = append(validEntries, entry)
376393
}
377394
}
@@ -397,31 +414,13 @@ func (c *InMemoryCache) cleanupExpiredEntries() {
397414
metrics.UpdateCacheEntries("memory", len(c.entries))
398415
}
399416

400-
// cleanupExpiredEntriesReadOnly identifies expired entries without modifying the cache
401-
// Used during read operations with only a read lock held
402-
func (c *InMemoryCache) cleanupExpiredEntriesReadOnly() {
417+
// isExpired checks if a cache entry has expired based on its last access time
418+
func (c *InMemoryCache) isExpired(entry CacheEntry, now time.Time) bool {
403419
if c.ttlSeconds <= 0 {
404-
return
405-
}
406-
407-
now := time.Now()
408-
expiredCount := 0
409-
410-
for _, entry := range c.entries {
411-
if now.Sub(entry.LastAccessAt).Seconds() >= float64(c.ttlSeconds) {
412-
expiredCount++
413-
}
420+
return false
414421
}
415422

416-
if expiredCount > 0 {
417-
observability.Debugf("InMemoryCache: found %d expired entries during read (TTL: %ds)",
418-
expiredCount, c.ttlSeconds)
419-
observability.LogEvent("cache_expired_entries_found", map[string]interface{}{
420-
"backend": "memory",
421-
"expired_count": expiredCount,
422-
"ttl_seconds": c.ttlSeconds,
423-
})
424-
}
423+
return now.Sub(entry.LastAccessAt) >= time.Duration(c.ttlSeconds)*time.Second
425424
}
426425

427426
// updateAccessInfo updates the access information for the given entry index

0 commit comments

Comments
 (0)