Skip to content

Commit e24c41c

Browse files
committed
feat: add RedisVL as new semantic cache storage
Signed-off-by: Huamin Chen <[email protected]>
1 parent a7e8c11 commit e24c41c

File tree

6 files changed

+924
-0
lines changed

6 files changed

+924
-0
lines changed

examples/redis-cache-example.go

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"log"
6+
7+
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
8+
)
9+
10+
func main() {
11+
// Example: Setting up Redis cache backend
12+
fmt.Println("Redis Cache Backend Example")
13+
fmt.Println("===========================")
14+
15+
// Configuration for Redis cache
16+
config := cache.CacheConfig{
17+
BackendType: cache.RedisCacheType,
18+
Enabled: true,
19+
SimilarityThreshold: 0.85,
20+
TTLSeconds: 3600, // Entries expire after 1 hour
21+
BackendConfigPath: "config/semantic-cache/redis.yaml",
22+
}
23+
24+
// Create cache backend
25+
fmt.Println("\n1. Creating Redis cache backend...")
26+
cacheBackend, err := cache.NewCacheBackend(config)
27+
if err != nil {
28+
log.Fatalf("Failed to create cache backend: %v", err)
29+
}
30+
defer cacheBackend.Close()
31+
32+
fmt.Println("✓ Redis cache backend created successfully")
33+
34+
// Example cache operations
35+
model := "gpt-4"
36+
query := "What is the capital of France?"
37+
requestID := "req-12345"
38+
requestBody := []byte(`{"model":"gpt-4","messages":[{"role":"user","content":"What is the capital of France?"}]}`)
39+
responseBody := []byte(`{"choices":[{"message":{"content":"The capital of France is Paris."}}]}`)
40+
41+
// Add entry to cache
42+
fmt.Println("\n2. Adding entry to cache...")
43+
err = cacheBackend.AddEntry(requestID, model, query, requestBody, responseBody)
44+
if err != nil {
45+
log.Fatalf("Failed to add entry: %v", err)
46+
}
47+
fmt.Println("✓ Entry added to cache")
48+
49+
// Search for similar entry
50+
fmt.Println("\n3. Searching for similar query...")
51+
similarQuery := "What's the capital city of France?"
52+
cachedResponse, found, err := cacheBackend.FindSimilar(model, similarQuery)
53+
if err != nil {
54+
log.Fatalf("Failed to search cache: %v", err)
55+
}
56+
57+
if found {
58+
fmt.Println("✓ Cache HIT! Found similar query")
59+
fmt.Printf(" Cached response: %s\n", string(cachedResponse))
60+
} else {
61+
fmt.Println("✗ Cache MISS - no similar query found")
62+
}
63+
64+
// Get cache statistics
65+
fmt.Println("\n4. Cache Statistics:")
66+
stats := cacheBackend.GetStats()
67+
fmt.Printf(" Total Entries: %d\n", stats.TotalEntries)
68+
fmt.Printf(" Hits: %d\n", stats.HitCount)
69+
fmt.Printf(" Misses: %d\n", stats.MissCount)
70+
fmt.Printf(" Hit Ratio: %.2f%%\n", stats.HitRatio*100)
71+
72+
// Example with custom threshold
73+
fmt.Println("\n5. Searching with custom threshold...")
74+
strictQuery := "Paris is the capital of which country?"
75+
cachedResponse, found, err = cacheBackend.FindSimilarWithThreshold(model, strictQuery, 0.75)
76+
if err != nil {
77+
log.Fatalf("Failed to search cache: %v", err)
78+
}
79+
80+
if found {
81+
fmt.Println("✓ Cache HIT with threshold 0.75")
82+
fmt.Printf(" Cached response: %s\n", string(cachedResponse))
83+
} else {
84+
fmt.Println("✗ Cache MISS with threshold 0.75")
85+
}
86+
87+
// Example: Pending request workflow
88+
fmt.Println("\n6. Pending Request Workflow:")
89+
newRequestID := "req-67890"
90+
newQuery := "What is machine learning?"
91+
newRequestBody := []byte(`{"model":"gpt-4","messages":[{"role":"user","content":"What is machine learning?"}]}`)
92+
93+
fmt.Println(" Adding pending request...")
94+
err = cacheBackend.AddPendingRequest(newRequestID, model, newQuery, newRequestBody)
95+
if err != nil {
96+
log.Fatalf("Failed to add pending request: %v", err)
97+
}
98+
fmt.Println(" ✓ Pending request added")
99+
100+
// Simulate getting response from LLM
101+
newResponseBody := []byte(`{"choices":[{"message":{"content":"Machine learning is a subset of AI..."}}]}`)
102+
103+
fmt.Println(" Updating with response...")
104+
err = cacheBackend.UpdateWithResponse(newRequestID, newResponseBody)
105+
if err != nil {
106+
log.Fatalf("Failed to update response: %v", err)
107+
}
108+
fmt.Println(" ✓ Response updated")
109+
110+
// Verify the entry is now cached
111+
cachedResponse, found, err = cacheBackend.FindSimilar(model, newQuery)
112+
if err != nil {
113+
log.Fatalf("Failed to search cache: %v", err)
114+
}
115+
116+
if found {
117+
fmt.Println(" ✓ Entry is now in cache and searchable")
118+
}
119+
120+
// Final statistics
121+
fmt.Println("\n7. Final Statistics:")
122+
stats = cacheBackend.GetStats()
123+
fmt.Printf(" Total Entries: %d\n", stats.TotalEntries)
124+
fmt.Printf(" Hits: %d\n", stats.HitCount)
125+
fmt.Printf(" Misses: %d\n", stats.MissCount)
126+
fmt.Printf(" Hit Ratio: %.2f%%\n", stats.HitRatio*100)
127+
128+
fmt.Println("\n✓ Example completed successfully!")
129+
}

src/semantic-router/go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ require (
2121
github.com/openai/openai-go v1.12.0
2222
github.com/prometheus/client_golang v1.23.0
2323
github.com/prometheus/client_model v0.6.2
24+
github.com/redis/go-redis/v9 v9.17.0
2425
github.com/stretchr/testify v1.11.1
2526
github.com/vllm-project/semantic-router/candle-binding v0.0.0-00010101000000-000000000000
2627
go.opentelemetry.io/otel v1.38.0
@@ -50,6 +51,7 @@ require (
5051
github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f // indirect
5152
github.com/cockroachdb/redact v1.1.3 // indirect
5253
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
54+
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
5355
github.com/emicklei/go-restful/v3 v3.12.2 // indirect
5456
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
5557
github.com/evanphx/json-patch/v5 v5.9.0 // indirect

src/semantic-router/go.sum

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPn
1212
github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
1313
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
1414
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
15+
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
16+
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
17+
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
18+
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
1519
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
1620
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
1721
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
@@ -43,6 +47,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1
4347
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4448
github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4=
4549
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
50+
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
51+
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
4652
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
4753
github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM=
4854
github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU=
@@ -272,6 +278,8 @@ github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2
272278
github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8=
273279
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
274280
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
281+
github.com/redis/go-redis/v9 v9.17.0 h1:K6E+ZlYN95KSMmZeEQPbU/c++wfmEvfFB17yEAq/VhM=
282+
github.com/redis/go-redis/v9 v9.17.0/go.mod h1:u410H11HMLoB+TP67dz8rL9s6QW2j76l0//kSOd3370=
275283
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
276284
github.com/rogpeppe/go-internal v1.8.1/go.mod h1:JeRgkft04UBgHMgCIwADu4Pn6Mtm5d4nPKWu0nJ5d+o=
277285
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=

src/semantic-router/pkg/cache/cache_factory.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,17 @@ func NewCacheBackend(config CacheConfig) (CacheBackend, error) {
5151
}
5252
return NewMilvusCache(options)
5353

54+
case RedisCacheType:
55+
logging.Debugf("Creating Redis cache backend - ConfigPath: %s, TTL: %ds, Threshold: %.3f",
56+
config.BackendConfigPath, config.TTLSeconds, config.SimilarityThreshold)
57+
options := RedisCacheOptions{
58+
Enabled: config.Enabled,
59+
SimilarityThreshold: config.SimilarityThreshold,
60+
TTLSeconds: config.TTLSeconds,
61+
ConfigPath: config.BackendConfigPath,
62+
}
63+
return NewRedisCache(options)
64+
5465
case HybridCacheType:
5566
logging.Debugf("Creating Hybrid cache backend - MaxMemory: %d, TTL: %ds, Threshold: %.3f",
5667
config.MaxMemoryEntries, config.TTLSeconds, config.SimilarityThreshold)
@@ -110,6 +121,16 @@ func ValidateCacheConfig(config CacheConfig) error {
110121
return fmt.Errorf("milvus config file not found: %s", config.BackendConfigPath)
111122
}
112123
logging.Debugf("Milvus config file found: %s", config.BackendConfigPath)
124+
case RedisCacheType:
125+
if config.BackendConfigPath == "" {
126+
return fmt.Errorf("backend_config_path is required for Redis cache backend")
127+
}
128+
// Ensure the Redis configuration file exists
129+
if _, err := os.Stat(config.BackendConfigPath); os.IsNotExist(err) {
130+
logging.Debugf("Redis config file not found: %s", config.BackendConfigPath)
131+
return fmt.Errorf("redis config file not found: %s", config.BackendConfigPath)
132+
}
133+
logging.Debugf("Redis config file found: %s", config.BackendConfigPath)
113134
}
114135

115136
return nil
@@ -162,5 +183,18 @@ func GetAvailableCacheBackends() []CacheBackendInfo {
162183
"TTL support",
163184
},
164185
},
186+
{
187+
Type: RedisCacheType,
188+
Name: "Redis Vector Database",
189+
Description: "High-performance semantic cache powered by Redis with vector search",
190+
Features: []string{
191+
"Fast in-memory performance",
192+
"Persistent storage with AOF/RDB",
193+
"Scalable with Redis Cluster",
194+
"HNSW and FLAT indexing",
195+
"Wide ecosystem support",
196+
"TTL support",
197+
},
198+
},
165199
}
166200
}

src/semantic-router/pkg/cache/cache_interface.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ const (
6464
// MilvusCacheType specifies the Milvus vector database backend
6565
MilvusCacheType CacheBackendType = "milvus"
6666

67+
// RedisCacheType specifies the Redis vector database backend
68+
RedisCacheType CacheBackendType = "redis"
69+
6770
// HybridCacheType specifies the hybrid HNSW + Milvus backend
6871
HybridCacheType CacheBackendType = "hybrid"
6972
)

0 commit comments

Comments
 (0)