Skip to content

Commit 8d283ab

Browse files
committed
fix: cache implementation stale types
Signed-off-by: Augustinas Malinauskas <[email protected]>
1 parent 722221c commit 8d283ab

File tree

1 file changed

+73
-38
lines changed

1 file changed

+73
-38
lines changed

website/docs/architecture/router-implementation.md

Lines changed: 73 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -103,51 +103,86 @@ func (r *OpenAIRouter) makeRoutingDecision(classification *Classification) *Rout
103103

104104
```go
105105
type SemanticCache struct {
106-
embeddings map[string][]float32 // Query embeddings
107-
responses map[string]CachedResponse
108-
similarity SimilarityCalculator
109-
ttl time.Duration
110-
maxEntries int
111-
mutex sync.RWMutex
106+
entries []CacheEntry
107+
mu sync.RWMutex
108+
similarityThreshold float32
109+
maxEntries int
110+
ttlSeconds int
111+
enabled bool
112112
}
113113

114-
type CachedResponse struct {
115-
Response interface{}
116-
Timestamp time.Time
117-
Model string
118-
Embeddings []float32
119-
HitCount int
114+
type CacheEntry struct {
115+
RequestBody []byte
116+
ResponseBody []byte
117+
Model string
118+
Query string
119+
Embedding []float32
120+
Timestamp time.Time
120121
}
121122

122-
// Cache lookup with semantic similarity
123-
func (sc *SemanticCache) Get(query string) (interface{}, bool) {
124-
sc.mutex.RLock()
125-
defer sc.mutex.RUnlock()
126-
127-
// Generate query embedding
128-
queryEmbedding := sc.generateEmbedding(query)
129-
130-
// Find most similar cached query
131-
bestSimilarity := 0.0
132-
var bestMatch *CachedResponse
133-
134-
for cachedQuery, embedding := range sc.embeddings {
135-
similarity := sc.similarity.CosineSimilarity(queryEmbedding, embedding)
136-
137-
if similarity > bestSimilarity && similarity > sc.similarityThreshold {
138-
bestSimilarity = similarity
139-
if response, exists := sc.responses[cachedQuery]; exists {
140-
bestMatch = &response
141-
}
123+
// FindSimilar looks for a similar request in the cache
124+
func (c *SemanticCache) FindSimilar(model string, query string) ([]byte, bool, error) {
125+
if !c.enabled {
126+
return nil, false, nil
127+
}
128+
129+
// Generate embedding for the query
130+
queryEmbedding, err := candle_binding.GetEmbedding(query, 512)
131+
if err != nil {
132+
return nil, false, fmt.Errorf("failed to generate embedding: %w", err)
133+
}
134+
135+
c.mu.RLock()
136+
defer c.mu.RUnlock()
137+
138+
// Cleanup expired entries
139+
c.cleanupExpiredEntriesReadOnly()
140+
141+
type SimilarityResult struct {
142+
Entry CacheEntry
143+
Similarity float32
144+
}
145+
146+
// Only compare with entries that have responses
147+
results := make([]SimilarityResult, 0, len(c.entries))
148+
for _, entry := range c.entries {
149+
if entry.ResponseBody == nil {
150+
continue // Skip entries without responses
142151
}
152+
153+
// Only compare with entries with the same model
154+
if entry.Model != model {
155+
continue
156+
}
157+
158+
// Calculate similarity using dot product
159+
var dotProduct float32
160+
for i := 0; i < len(queryEmbedding) && i < len(entry.Embedding); i++ {
161+
dotProduct += queryEmbedding[i] * entry.Embedding[i]
162+
}
163+
164+
results = append(results, SimilarityResult{
165+
Entry: entry,
166+
Similarity: dotProduct,
167+
})
143168
}
144-
145-
if bestMatch != nil && time.Since(bestMatch.Timestamp) < sc.ttl {
146-
bestMatch.HitCount++
147-
return bestMatch.Response, true
169+
170+
// No results found
171+
if len(results) == 0 {
172+
return nil, false, nil
148173
}
149-
150-
return nil, false
174+
175+
// Sort by similarity (highest first)
176+
sort.Slice(results, func(i, j int) bool {
177+
return results[i].Similarity > results[j].Similarity
178+
})
179+
180+
// Check if the best match exceeds the threshold
181+
if results[0].Similarity >= c.similarityThreshold {
182+
return results[0].Entry.ResponseBody, true, nil
183+
}
184+
185+
return nil, false, nil
151186
}
152187
```
153188

0 commit comments

Comments
 (0)