@@ -230,28 +230,34 @@ func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, e
230230 }
231231
232232 c .mu .RLock ()
233-
234- // Check for expired entries during search
235- c .cleanupExpiredEntriesReadOnly ()
236-
237233 var (
238234 bestIndex = - 1
239235 bestEntry CacheEntry
240236 bestSimilarity float32
241237 entriesChecked int
238+ expiredCount int
242239 )
240+ // Capture the lookup time after acquiring the read lock so TTL checks aren’t skewed by embedding work or lock wait
241+ now := time .Now ()
243242
244243 // Compare with completed entries for the same model, tracking only the best match
245244 for entryIndex , entry := range c .entries {
245+ // Skip incomplete entries
246246 if entry .ResponseBody == nil {
247- continue // Skip incomplete entries
247+ continue
248248 }
249249
250250 // Only consider entries for the same model
251251 if entry .Model != model {
252252 continue
253253 }
254254
255+ // Skip entries that have expired before considering them
256+ if c .isExpired (entry , now ) {
257+ expiredCount ++
258+ continue
259+ }
260+
255261 // Compute semantic similarity using dot product
256262 var dotProduct float32
257263 for i := 0 ; i < len (queryEmbedding ) && i < len (entry .Embedding ); i ++ {
@@ -272,6 +278,17 @@ func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, e
272278 // Unlock the read lock since we need the write lock to update the access info
273279 c .mu .RUnlock ()
274280
281+ // Log if any expired entries were skipped
282+ if expiredCount > 0 {
283+ observability .Debugf ("InMemoryCache: excluded %d expired entries during search (TTL: %ds)" ,
284+ expiredCount , c .ttlSeconds )
285+ observability .LogEvent ("cache_expired_entries_found" , map [string ]interface {}{
286+ "backend" : "memory" ,
287+ "expired_count" : expiredCount ,
288+ "ttl_seconds" : c .ttlSeconds ,
289+ })
290+ }
291+
275292 // Handle case where no suitable entries exist
276293 if bestIndex < 0 {
277294 atomic .AddInt64 (& c .missCount , 1 )
@@ -371,7 +388,7 @@ func (c *InMemoryCache) cleanupExpiredEntries() {
371388
372389 for _ , entry := range c .entries {
373390 // Retain entries that are still within their TTL based on last access
374- if now . Sub (entry . LastAccessAt ). Seconds () < float64 ( c . ttlSeconds ) {
391+ if ! c . isExpired (entry , now ) {
375392 validEntries = append (validEntries , entry )
376393 }
377394 }
@@ -397,31 +414,13 @@ func (c *InMemoryCache) cleanupExpiredEntries() {
397414 metrics .UpdateCacheEntries ("memory" , len (c .entries ))
398415}
399416
400- // cleanupExpiredEntriesReadOnly identifies expired entries without modifying the cache
401- // Used during read operations with only a read lock held
402- func (c * InMemoryCache ) cleanupExpiredEntriesReadOnly () {
417+ // isExpired checks if a cache entry has expired based on its last access time
418+ func (c * InMemoryCache ) isExpired (entry CacheEntry , now time.Time ) bool {
403419 if c .ttlSeconds <= 0 {
404- return
405- }
406-
407- now := time .Now ()
408- expiredCount := 0
409-
410- for _ , entry := range c .entries {
411- if now .Sub (entry .LastAccessAt ).Seconds () >= float64 (c .ttlSeconds ) {
412- expiredCount ++
413- }
420+ return false
414421 }
415422
416- if expiredCount > 0 {
417- observability .Debugf ("InMemoryCache: found %d expired entries during read (TTL: %ds)" ,
418- expiredCount , c .ttlSeconds )
419- observability .LogEvent ("cache_expired_entries_found" , map [string ]interface {}{
420- "backend" : "memory" ,
421- "expired_count" : expiredCount ,
422- "ttl_seconds" : c .ttlSeconds ,
423- })
424- }
423+ return now .Sub (entry .LastAccessAt ) >= time .Duration (c .ttlSeconds )* time .Second
425424}
426425
427426// updateAccessInfo updates the access information for the given entry index
0 commit comments