@@ -103,51 +103,86 @@ func (r *OpenAIRouter) makeRoutingDecision(classification *Classification) *Rout
103103
104104``` go
105105type SemanticCache struct {
106- embeddings map [ string ][] float32 // Query embeddings
107- responses map [ string ]CachedResponse
108- similarity SimilarityCalculator
109- ttl time. Duration
110- maxEntries int
111- mutex sync. RWMutex
106+ entries [] CacheEntry
107+ mu sync. RWMutex
108+ similarityThreshold float32
109+ maxEntries int
110+ ttlSeconds int
111+ enabled bool
112112}
113113
114- type CachedResponse struct {
115- Response interface {}
116- Timestamp time.Time
117- Model string
118- Embeddings []float32
119- HitCount int
114+ type CacheEntry struct {
115+ RequestBody []byte
116+ ResponseBody []byte
117+ Model string
118+ Query string
119+ Embedding []float32
120+ Timestamp time.Time
120121}
121122
122- // Cache lookup with semantic similarity
123- func (sc *SemanticCache ) Get (query string ) (interface {}, bool ) {
124- sc.mutex .RLock ()
125- defer sc.mutex .RUnlock ()
126-
127- // Generate query embedding
128- queryEmbedding := sc.generateEmbedding (query)
129-
130- // Find most similar cached query
131- bestSimilarity := 0.0
132- var bestMatch *CachedResponse
133-
134- for cachedQuery , embedding := range sc.embeddings {
135- similarity := sc.similarity .CosineSimilarity (queryEmbedding, embedding)
136-
137- if similarity > bestSimilarity && similarity > sc.similarityThreshold {
138- bestSimilarity = similarity
139- if response , exists := sc.responses [cachedQuery]; exists {
140- bestMatch = &response
141- }
123+ // FindSimilar looks for a similar request in the cache
124+ func (c *SemanticCache ) FindSimilar (model string , query string ) ([]byte , bool , error ) {
125+ if !c.enabled {
126+ return nil , false , nil
127+ }
128+
129+ // Generate embedding for the query
130+ queryEmbedding , err := candle_binding.GetEmbedding (query, 512 )
131+ if err != nil {
132+ return nil , false , fmt.Errorf (" failed to generate embedding: % w" , err)
133+ }
134+
135+ c.mu .RLock ()
136+ defer c.mu .RUnlock ()
137+
138+ // Cleanup expired entries
139+ c.cleanupExpiredEntriesReadOnly ()
140+
141+ type SimilarityResult struct {
142+ Entry CacheEntry
143+ Similarity float32
144+ }
145+
146+ // Only compare with entries that have responses
147+ results := make ([]SimilarityResult, 0 , len (c.entries ))
148+ for _ , entry := range c.entries {
149+ if entry.ResponseBody == nil {
150+ continue // Skip entries without responses
142151 }
152+
153+ // Only compare with entries with the same model
154+ if entry.Model != model {
155+ continue
156+ }
157+
158+ // Calculate similarity using dot product
159+ var dotProduct float32
160+ for i := 0 ; i < len (queryEmbedding) && i < len (entry.Embedding ); i++ {
161+ dotProduct += queryEmbedding[i] * entry.Embedding [i]
162+ }
163+
164+ results = append (results, SimilarityResult{
165+ Entry: entry,
166+ Similarity: dotProduct,
167+ })
143168 }
144-
145- if bestMatch != nil && time. Since (bestMatch. Timestamp ) < sc. ttl {
146- bestMatch. HitCount ++
147- return bestMatch. Response , true
169+
170+ // No results found
171+ if len (results) == 0 {
172+ return nil , false , nil
148173 }
149-
150- return nil , false
174+
175+ // Sort by similarity (highest first)
176+ sort.Slice (results, func (i, j int ) bool {
177+ return results[i].Similarity > results[j].Similarity
178+ })
179+
180+ // Check if the best match exceeds the threshold
181+ if results[0 ].Similarity >= c.similarityThreshold {
182+ return results[0 ].Entry .ResponseBody , true , nil
183+ }
184+
185+ return nil , false , nil
151186}
152187```
153188
0 commit comments