@@ -5,7 +5,6 @@ package cache
5
5
6
6
import (
7
7
"fmt"
8
- "sort"
9
8
"sync"
10
9
"sync/atomic"
11
10
"time"
@@ -235,14 +234,14 @@ func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, e
235
234
// Check for expired entries during search
236
235
c .cleanupExpiredEntriesReadOnly ()
237
236
238
- type SimilarityResult struct {
239
- EntryIndex int
240
- Entry CacheEntry
241
- Similarity float32
242
- }
237
+ var (
238
+ bestIndex = - 1
239
+ bestEntry CacheEntry
240
+ bestSimilarity float32
241
+ entriesChecked int
242
+ )
243
243
244
- // Compare with completed entries for the same model
245
- results := make ([]SimilarityResult , 0 , len (c .entries ))
244
+ // Compare with completed entries for the same model, tracking only the best match
246
245
for entryIndex , entry := range c .entries {
247
246
if entry .ResponseBody == nil {
248
247
continue // Skip incomplete entries
@@ -259,60 +258,59 @@ func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, e
259
258
dotProduct += queryEmbedding [i ] * entry .Embedding [i ]
260
259
}
261
260
262
- results = append (results , SimilarityResult {
263
- EntryIndex : entryIndex ,
264
- Entry : entry ,
265
- Similarity : dotProduct ,
266
- })
261
+ entriesChecked ++
262
+ if bestIndex == - 1 || dotProduct > bestSimilarity {
263
+ bestSimilarity = dotProduct
264
+ bestIndex = entryIndex
265
+ }
266
+ }
267
+ // Snapshot the best entry before releasing the read lock
268
+ if bestIndex >= 0 {
269
+ bestEntry = c .entries [bestIndex ]
267
270
}
268
271
269
- // unlock the read lock since we need the write lock to update the access info
272
+ // Unlock the read lock since we need the write lock to update the access info
270
273
c .mu .RUnlock ()
271
274
272
275
// Handle case where no suitable entries exist
273
- if len ( results ) == 0 {
276
+ if bestIndex < 0 {
274
277
atomic .AddInt64 (& c .missCount , 1 )
275
278
observability .Debugf ("InMemoryCache.FindSimilar: no entries found with responses" )
276
279
metrics .RecordCacheOperation ("memory" , "find_similar" , "miss" , time .Since (start ).Seconds ())
277
280
metrics .RecordCacheMiss ()
278
281
return nil , false , nil
279
282
}
280
283
281
- // Sort results by similarity score (highest first)
282
- sort .Slice (results , func (i , j int ) bool {
283
- return results [i ].Similarity > results [j ].Similarity
284
- })
285
-
286
284
// Check if the best match meets the similarity threshold
287
- if results [ 0 ]. Similarity >= c .similarityThreshold {
285
+ if bestSimilarity >= c .similarityThreshold {
288
286
atomic .AddInt64 (& c .hitCount , 1 )
289
287
290
288
c .mu .Lock ()
291
- c .updateAccessInfo (results [ 0 ]. EntryIndex , results [ 0 ]. Entry )
289
+ c .updateAccessInfo (bestIndex , bestEntry )
292
290
c .mu .Unlock ()
293
291
294
292
observability .Debugf ("InMemoryCache.FindSimilar: CACHE HIT - similarity=%.4f >= threshold=%.4f, response_size=%d bytes" ,
295
- results [ 0 ]. Similarity , c .similarityThreshold , len (results [ 0 ]. Entry .ResponseBody ))
293
+ bestSimilarity , c .similarityThreshold , len (bestEntry .ResponseBody ))
296
294
observability .LogEvent ("cache_hit" , map [string ]interface {}{
297
295
"backend" : "memory" ,
298
- "similarity" : results [ 0 ]. Similarity ,
296
+ "similarity" : bestSimilarity ,
299
297
"threshold" : c .similarityThreshold ,
300
298
"model" : model ,
301
299
})
302
300
metrics .RecordCacheOperation ("memory" , "find_similar" , "hit" , time .Since (start ).Seconds ())
303
301
metrics .RecordCacheHit ()
304
- return results [ 0 ]. Entry .ResponseBody , true , nil
302
+ return bestEntry .ResponseBody , true , nil
305
303
}
306
304
307
305
atomic .AddInt64 (& c .missCount , 1 )
308
306
observability .Debugf ("InMemoryCache.FindSimilar: CACHE MISS - best_similarity=%.4f < threshold=%.4f (checked %d entries)" ,
309
- results [ 0 ]. Similarity , c .similarityThreshold , len ( results ) )
307
+ bestSimilarity , c .similarityThreshold , entriesChecked )
310
308
observability .LogEvent ("cache_miss" , map [string ]interface {}{
311
309
"backend" : "memory" ,
312
- "best_similarity" : results [ 0 ]. Similarity ,
310
+ "best_similarity" : bestSimilarity ,
313
311
"threshold" : c .similarityThreshold ,
314
312
"model" : model ,
315
- "entries_checked" : len ( results ) ,
313
+ "entries_checked" : entriesChecked ,
316
314
})
317
315
metrics .RecordCacheOperation ("memory" , "find_similar" , "miss" , time .Since (start ).Seconds ())
318
316
metrics .RecordCacheMiss ()
0 commit comments