Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions src/semantic-router/pkg/cache/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,30 @@ development:
Expect(stats.HitRatio).To(Equal(0.5))
})

It("should skip expired entries during similarity search", func() {
ttlCache := cache.NewInMemoryCache(cache.InMemoryCacheOptions{
Enabled: true,
SimilarityThreshold: 0.1,
MaxEntries: 10,
TTLSeconds: 1,
})
defer ttlCache.Close()

err := ttlCache.AddEntry("ttl-request-id", "ttl-model", "time-sensitive query", []byte("request"), []byte("response"))
Expect(err).NotTo(HaveOccurred())

time.Sleep(1100 * time.Millisecond)
Comment on lines +580 to +591
Copy link

Copilot AI Oct 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The hardcoded sleep duration of 1100ms (1.1 seconds) relies on a magic number. Consider defining this as a constant like ttlWaitDuration = 1100 * time.Millisecond or calculating it as time.Duration(ttlSeconds)*time.Second + 100*time.Millisecond to make the relationship to the TTL explicit.

Suggested change
ttlCache := cache.NewInMemoryCache(cache.InMemoryCacheOptions{
Enabled: true,
SimilarityThreshold: 0.1,
MaxEntries: 10,
TTLSeconds: 1,
})
defer ttlCache.Close()
err := ttlCache.AddEntry("ttl-request-id", "ttl-model", "time-sensitive query", []byte("request"), []byte("response"))
Expect(err).NotTo(HaveOccurred())
time.Sleep(1100 * time.Millisecond)
ttlSeconds := 1
ttlCache := cache.NewInMemoryCache(cache.InMemoryCacheOptions{
Enabled: true,
SimilarityThreshold: 0.1,
MaxEntries: 10,
TTLSeconds: ttlSeconds,
})
defer ttlCache.Close()
err := ttlCache.AddEntry("ttl-request-id", "ttl-model", "time-sensitive query", []byte("request"), []byte("response"))
Expect(err).NotTo(HaveOccurred())
ttlWaitDuration := time.Duration(ttlSeconds)*time.Second + 100*time.Millisecond
time.Sleep(ttlWaitDuration)

Copilot uses AI. Check for mistakes.


response, found, err := ttlCache.FindSimilar("ttl-model", "time-sensitive query")
Expect(err).NotTo(HaveOccurred())
Expect(found).To(BeFalse())
Expect(response).To(BeNil())

stats := ttlCache.GetStats()
Expect(stats.HitCount).To(Equal(int64(0)))
Expect(stats.MissCount).To(Equal(int64(1)))
})

It("should handle error when updating non-existent pending request", func() {
err := inMemoryCache.UpdateWithResponse("non-existent-query", []byte("response"))
Expect(err).To(HaveOccurred())
Expand Down
55 changes: 27 additions & 28 deletions src/semantic-router/pkg/cache/inmemory_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,28 +230,34 @@ func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, e
}

c.mu.RLock()

// Check for expired entries during search
c.cleanupExpiredEntriesReadOnly()

var (
bestIndex = -1
bestEntry CacheEntry
bestSimilarity float32
entriesChecked int
expiredCount int
)
// Capture the lookup time after acquiring the read lock so TTL checks aren’t skewed by embedding work or lock wait
now := time.Now()

// Compare with completed entries for the same model, tracking only the best match
for entryIndex, entry := range c.entries {
// Skip incomplete entries
if entry.ResponseBody == nil {
continue // Skip incomplete entries
continue
}

// Only consider entries for the same model
if entry.Model != model {
continue
}

// Skip entries that have expired before considering them
if c.isExpired(entry, now) {
expiredCount++
continue
}

// Compute semantic similarity using dot product
var dotProduct float32
for i := 0; i < len(queryEmbedding) && i < len(entry.Embedding); i++ {
Expand All @@ -272,6 +278,17 @@ func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, e
// Unlock the read lock since we need the write lock to update the access info
c.mu.RUnlock()

// Log if any expired entries were skipped
if expiredCount > 0 {
observability.Debugf("InMemoryCache: excluded %d expired entries during search (TTL: %ds)",
expiredCount, c.ttlSeconds)
observability.LogEvent("cache_expired_entries_found", map[string]interface{}{
"backend": "memory",
"expired_count": expiredCount,
"ttl_seconds": c.ttlSeconds,
})
}

// Handle case where no suitable entries exist
if bestIndex < 0 {
atomic.AddInt64(&c.missCount, 1)
Expand Down Expand Up @@ -371,7 +388,7 @@ func (c *InMemoryCache) cleanupExpiredEntries() {

for _, entry := range c.entries {
// Retain entries that are still within their TTL based on last access
if now.Sub(entry.LastAccessAt).Seconds() < float64(c.ttlSeconds) {
if !c.isExpired(entry, now) {
validEntries = append(validEntries, entry)
}
}
Expand All @@ -397,31 +414,13 @@ func (c *InMemoryCache) cleanupExpiredEntries() {
metrics.UpdateCacheEntries("memory", len(c.entries))
}

// cleanupExpiredEntriesReadOnly identifies expired entries without modifying the cache
// Used during read operations with only a read lock held
func (c *InMemoryCache) cleanupExpiredEntriesReadOnly() {
// isExpired checks if a cache entry has expired based on its last access time
func (c *InMemoryCache) isExpired(entry CacheEntry, now time.Time) bool {
if c.ttlSeconds <= 0 {
return
}

now := time.Now()
expiredCount := 0

for _, entry := range c.entries {
if now.Sub(entry.LastAccessAt).Seconds() >= float64(c.ttlSeconds) {
expiredCount++
}
return false
}

if expiredCount > 0 {
observability.Debugf("InMemoryCache: found %d expired entries during read (TTL: %ds)",
expiredCount, c.ttlSeconds)
observability.LogEvent("cache_expired_entries_found", map[string]interface{}{
"backend": "memory",
"expired_count": expiredCount,
"ttl_seconds": c.ttlSeconds,
})
}
return now.Sub(entry.LastAccessAt) >= time.Duration(c.ttlSeconds)*time.Second
Copy link

Copilot AI Oct 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The conversion time.Duration(c.ttlSeconds)*time.Second is computed on every call to isExpired. Since ttlSeconds is constant for the cache instance, consider precomputing this as a time.Duration field (e.g., ttlDuration) during cache initialization to avoid repeated multiplications.

Copilot uses AI. Check for mistakes.

}

// updateAccessInfo updates the access information for the given entry index
Expand Down
Loading