Skip to content

Commit 35a95f8

Browse files
authored
perf: optimize FindSimilar by tracking best match (#347)
Signed-off-by: cryo <[email protected]>
1 parent e54d751 commit 35a95f8

File tree

1 file changed

+26
-28
lines changed

1 file changed

+26
-28
lines changed

src/semantic-router/pkg/cache/inmemory_cache.go

Lines changed: 26 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ package cache
55

66
import (
77
"fmt"
8-
"sort"
98
"sync"
109
"sync/atomic"
1110
"time"
@@ -235,14 +234,14 @@ func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, e
235234
// Check for expired entries during search
236235
c.cleanupExpiredEntriesReadOnly()
237236

238-
type SimilarityResult struct {
239-
EntryIndex int
240-
Entry CacheEntry
241-
Similarity float32
242-
}
237+
var (
238+
bestIndex = -1
239+
bestEntry CacheEntry
240+
bestSimilarity float32
241+
entriesChecked int
242+
)
243243

244-
// Compare with completed entries for the same model
245-
results := make([]SimilarityResult, 0, len(c.entries))
244+
// Compare with completed entries for the same model, tracking only the best match
246245
for entryIndex, entry := range c.entries {
247246
if entry.ResponseBody == nil {
248247
continue // Skip incomplete entries
@@ -259,60 +258,59 @@ func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, e
259258
dotProduct += queryEmbedding[i] * entry.Embedding[i]
260259
}
261260

262-
results = append(results, SimilarityResult{
263-
EntryIndex: entryIndex,
264-
Entry: entry,
265-
Similarity: dotProduct,
266-
})
261+
entriesChecked++
262+
if bestIndex == -1 || dotProduct > bestSimilarity {
263+
bestSimilarity = dotProduct
264+
bestIndex = entryIndex
265+
}
266+
}
267+
// Snapshot the best entry before releasing the read lock
268+
if bestIndex >= 0 {
269+
bestEntry = c.entries[bestIndex]
267270
}
268271

269-
// unlock the read lock since we need the write lock to update the access info
272+
// Unlock the read lock since we need the write lock to update the access info
270273
c.mu.RUnlock()
271274

272275
// Handle case where no suitable entries exist
273-
if len(results) == 0 {
276+
if bestIndex < 0 {
274277
atomic.AddInt64(&c.missCount, 1)
275278
observability.Debugf("InMemoryCache.FindSimilar: no entries found with responses")
276279
metrics.RecordCacheOperation("memory", "find_similar", "miss", time.Since(start).Seconds())
277280
metrics.RecordCacheMiss()
278281
return nil, false, nil
279282
}
280283

281-
// Sort results by similarity score (highest first)
282-
sort.Slice(results, func(i, j int) bool {
283-
return results[i].Similarity > results[j].Similarity
284-
})
285-
286284
// Check if the best match meets the similarity threshold
287-
if results[0].Similarity >= c.similarityThreshold {
285+
if bestSimilarity >= c.similarityThreshold {
288286
atomic.AddInt64(&c.hitCount, 1)
289287

290288
c.mu.Lock()
291-
c.updateAccessInfo(results[0].EntryIndex, results[0].Entry)
289+
c.updateAccessInfo(bestIndex, bestEntry)
292290
c.mu.Unlock()
293291

294292
observability.Debugf("InMemoryCache.FindSimilar: CACHE HIT - similarity=%.4f >= threshold=%.4f, response_size=%d bytes",
295-
results[0].Similarity, c.similarityThreshold, len(results[0].Entry.ResponseBody))
293+
bestSimilarity, c.similarityThreshold, len(bestEntry.ResponseBody))
296294
observability.LogEvent("cache_hit", map[string]interface{}{
297295
"backend": "memory",
298-
"similarity": results[0].Similarity,
296+
"similarity": bestSimilarity,
299297
"threshold": c.similarityThreshold,
300298
"model": model,
301299
})
302300
metrics.RecordCacheOperation("memory", "find_similar", "hit", time.Since(start).Seconds())
303301
metrics.RecordCacheHit()
304-
return results[0].Entry.ResponseBody, true, nil
302+
return bestEntry.ResponseBody, true, nil
305303
}
306304

307305
atomic.AddInt64(&c.missCount, 1)
308306
observability.Debugf("InMemoryCache.FindSimilar: CACHE MISS - best_similarity=%.4f < threshold=%.4f (checked %d entries)",
309-
results[0].Similarity, c.similarityThreshold, len(results))
307+
bestSimilarity, c.similarityThreshold, entriesChecked)
310308
observability.LogEvent("cache_miss", map[string]interface{}{
311309
"backend": "memory",
312-
"best_similarity": results[0].Similarity,
310+
"best_similarity": bestSimilarity,
313311
"threshold": c.similarityThreshold,
314312
"model": model,
315-
"entries_checked": len(results),
313+
"entries_checked": entriesChecked,
316314
})
317315
metrics.RecordCacheOperation("memory", "find_similar", "miss", time.Since(start).Seconds())
318316
metrics.RecordCacheMiss()

0 commit comments

Comments
 (0)