feat: add RedisVL as new semantic cache storage

rootfs · rootfs · commit e24c41c905fe · 2025-11-26T03:10:29.000Z
Signed-off-by: Huamin Chen &lt;hchen@redhat.com&gt;
diff --git a/examples/redis-cache-example.go b/examples/redis-cache-example.go
@@ -0,0 +1,129 @@
+package main
+
+import (
+	"fmt"
+	"log"
+
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
+)
+
+func main() {
+	// Example: Setting up Redis cache backend
+	fmt.Println("Redis Cache Backend Example")
+	fmt.Println("===========================")
+
+	// Configuration for Redis cache
+	config := cache.CacheConfig{
+		BackendType:         cache.RedisCacheType,
+		Enabled:             true,
+		SimilarityThreshold: 0.85,
+		TTLSeconds:          3600, // Entries expire after 1 hour
+		BackendConfigPath:   "config/semantic-cache/redis.yaml",
+	}
+
+	// Create cache backend
+	fmt.Println("\n1. Creating Redis cache backend...")
+	cacheBackend, err := cache.NewCacheBackend(config)
+	if err != nil {
+		log.Fatalf("Failed to create cache backend: %v", err)
+	}
+	defer cacheBackend.Close()
+
+	fmt.Println("✓ Redis cache backend created successfully")
+
+	// Example cache operations
+	model := "gpt-4"
+	query := "What is the capital of France?"
+	requestID := "req-12345"
+	requestBody := []byte(`{"model":"gpt-4","messages":[{"role":"user","content":"What is the capital of France?"}]}`)
+	responseBody := []byte(`{"choices":[{"message":{"content":"The capital of France is Paris."}}]}`)
+
+	// Add entry to cache
+	fmt.Println("\n2. Adding entry to cache...")
+	err = cacheBackend.AddEntry(requestID, model, query, requestBody, responseBody)
+	if err != nil {
+		log.Fatalf("Failed to add entry: %v", err)
+	}
+	fmt.Println("✓ Entry added to cache")
+
+	// Search for similar entry
+	fmt.Println("\n3. Searching for similar query...")
+	similarQuery := "What's the capital city of France?"
+	cachedResponse, found, err := cacheBackend.FindSimilar(model, similarQuery)
+	if err != nil {
+		log.Fatalf("Failed to search cache: %v", err)
+	}
+
+	if found {
+		fmt.Println("✓ Cache HIT! Found similar query")
+		fmt.Printf("  Cached response: %s\n", string(cachedResponse))
+	} else {
+		fmt.Println("✗ Cache MISS - no similar query found")
+	}
+
+	// Get cache statistics
+	fmt.Println("\n4. Cache Statistics:")
+	stats := cacheBackend.GetStats()
+	fmt.Printf("  Total Entries: %d\n", stats.TotalEntries)
+	fmt.Printf("  Hits: %d\n", stats.HitCount)
+	fmt.Printf("  Misses: %d\n", stats.MissCount)
+	fmt.Printf("  Hit Ratio: %.2f%%\n", stats.HitRatio*100)
+
+	// Example with custom threshold
+	fmt.Println("\n5. Searching with custom threshold...")
+	strictQuery := "Paris is the capital of which country?"
+	cachedResponse, found, err = cacheBackend.FindSimilarWithThreshold(model, strictQuery, 0.75)
+	if err != nil {
+		log.Fatalf("Failed to search cache: %v", err)
+	}
+
+	if found {
+		fmt.Println("✓ Cache HIT with threshold 0.75")
+		fmt.Printf("  Cached response: %s\n", string(cachedResponse))
+	} else {
+		fmt.Println("✗ Cache MISS with threshold 0.75")
+	}
+
+	// Example: Pending request workflow
+	fmt.Println("\n6. Pending Request Workflow:")
+	newRequestID := "req-67890"
+	newQuery := "What is machine learning?"
+	newRequestBody := []byte(`{"model":"gpt-4","messages":[{"role":"user","content":"What is machine learning?"}]}`)
+
+	fmt.Println("  Adding pending request...")
+	err = cacheBackend.AddPendingRequest(newRequestID, model, newQuery, newRequestBody)
+	if err != nil {
+		log.Fatalf("Failed to add pending request: %v", err)
+	}
+	fmt.Println("  ✓ Pending request added")
+
+	// Simulate getting response from LLM
+	newResponseBody := []byte(`{"choices":[{"message":{"content":"Machine learning is a subset of AI..."}}]}`)
+
+	fmt.Println("  Updating with response...")
+	err = cacheBackend.UpdateWithResponse(newRequestID, newResponseBody)
+	if err != nil {
+		log.Fatalf("Failed to update response: %v", err)
+	}
+	fmt.Println("  ✓ Response updated")
+
+	// Verify the entry is now cached
+	cachedResponse, found, err = cacheBackend.FindSimilar(model, newQuery)
+	if err != nil {
+		log.Fatalf("Failed to search cache: %v", err)
+	}
+
+	if found {
+		fmt.Println("  ✓ Entry is now in cache and searchable")
+	}
+
+	// Final statistics
+	fmt.Println("\n7. Final Statistics:")
+	stats = cacheBackend.GetStats()
+	fmt.Printf("  Total Entries: %d\n", stats.TotalEntries)
+	fmt.Printf("  Hits: %d\n", stats.HitCount)
+	fmt.Printf("  Misses: %d\n", stats.MissCount)
+	fmt.Printf("  Hit Ratio: %.2f%%\n", stats.HitRatio*100)
+
+	fmt.Println("\n✓ Example completed successfully!")
+}
diff --git a/src/semantic-router/go.mod b/src/semantic-router/go.mod
@@ -21,6 +21,7 @@ require (
 	github.com/openai/openai-go v1.12.0
 	github.com/prometheus/client_golang v1.23.0
 	github.com/prometheus/client_model v0.6.2
+	github.com/redis/go-redis/v9 v9.17.0
 	github.com/stretchr/testify v1.11.1
 	github.com/vllm-project/semantic-router/candle-binding v0.0.0-00010101000000-000000000000
 	go.opentelemetry.io/otel v1.38.0
@@ -50,6 +51,7 @@ require (
 	github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f // indirect
 	github.com/cockroachdb/redact v1.1.3 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
+	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
 	github.com/emicklei/go-restful/v3 v3.12.2 // indirect
 	github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
 	github.com/evanphx/json-patch/v5 v5.9.0 // indirect
diff --git a/src/semantic-router/go.sum b/src/semantic-router/go.sum
@@ -12,6 +12,10 @@ github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPn
 github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
+github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
+github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
+github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
 github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
 github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
 github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
@@ -43,6 +47,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4=
 github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
+github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
+github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
 github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM=
 github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU=
@@ -272,6 +278,8 @@ github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2
 github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8=
 github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
 github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
+github.com/redis/go-redis/v9 v9.17.0 h1:K6E+ZlYN95KSMmZeEQPbU/c++wfmEvfFB17yEAq/VhM=
+github.com/redis/go-redis/v9 v9.17.0/go.mod h1:u410H11HMLoB+TP67dz8rL9s6QW2j76l0//kSOd3370=
 github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
 github.com/rogpeppe/go-internal v1.8.1/go.mod h1:JeRgkft04UBgHMgCIwADu4Pn6Mtm5d4nPKWu0nJ5d+o=
 github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
diff --git a/src/semantic-router/pkg/cache/cache_factory.go b/src/semantic-router/pkg/cache/cache_factory.go
@@ -51,6 +51,17 @@ func NewCacheBackend(config CacheConfig) (CacheBackend, error) {
 		}
 		return NewMilvusCache(options)
 
+	case RedisCacheType:
+		logging.Debugf("Creating Redis cache backend - ConfigPath: %s, TTL: %ds, Threshold: %.3f",
+			config.BackendConfigPath, config.TTLSeconds, config.SimilarityThreshold)
+		options := RedisCacheOptions{
+			Enabled:             config.Enabled,
+			SimilarityThreshold: config.SimilarityThreshold,
+			TTLSeconds:          config.TTLSeconds,
+			ConfigPath:          config.BackendConfigPath,
+		}
+		return NewRedisCache(options)
+
 	case HybridCacheType:
 		logging.Debugf("Creating Hybrid cache backend - MaxMemory: %d, TTL: %ds, Threshold: %.3f",
 			config.MaxMemoryEntries, config.TTLSeconds, config.SimilarityThreshold)
@@ -110,6 +121,16 @@ func ValidateCacheConfig(config CacheConfig) error {
 			return fmt.Errorf("milvus config file not found: %s", config.BackendConfigPath)
 		}
 		logging.Debugf("Milvus config file found: %s", config.BackendConfigPath)
+	case RedisCacheType:
+		if config.BackendConfigPath == "" {
+			return fmt.Errorf("backend_config_path is required for Redis cache backend")
+		}
+		// Ensure the Redis configuration file exists
+		if _, err := os.Stat(config.BackendConfigPath); os.IsNotExist(err) {
+			logging.Debugf("Redis config file not found: %s", config.BackendConfigPath)
+			return fmt.Errorf("redis config file not found: %s", config.BackendConfigPath)
+		}
+		logging.Debugf("Redis config file found: %s", config.BackendConfigPath)
 	}
 
 	return nil
@@ -162,5 +183,18 @@ func GetAvailableCacheBackends() []CacheBackendInfo {
 				"TTL support",
 			},
 		},
+		{
+			Type:        RedisCacheType,
+			Name:        "Redis Vector Database",
+			Description: "High-performance semantic cache powered by Redis with vector search",
+			Features: []string{
+				"Fast in-memory performance",
+				"Persistent storage with AOF/RDB",
+				"Scalable with Redis Cluster",
+				"HNSW and FLAT indexing",
+				"Wide ecosystem support",
+				"TTL support",
+			},
+		},
 	}
 }
diff --git a/src/semantic-router/pkg/cache/cache_interface.go b/src/semantic-router/pkg/cache/cache_interface.go
@@ -64,6 +64,9 @@ const (
 	// MilvusCacheType specifies the Milvus vector database backend
 	MilvusCacheType CacheBackendType = "milvus"
 
+	// RedisCacheType specifies the Redis vector database backend
+	RedisCacheType CacheBackendType = "redis"
+
 	// HybridCacheType specifies the hybrid HNSW + Milvus backend
 	HybridCacheType CacheBackendType = "hybrid"
 )
diff --git a/src/semantic-router/pkg/cache/redis_cache.go b/src/semantic-router/pkg/cache/redis_cache.go

Original file line number	Diff line number	Diff line change
`@@ -64,6 +64,9 @@ const (`
`64`	`64`	`// MilvusCacheType specifies the Milvus vector database backend`
`65`	`65`	`MilvusCacheType CacheBackendType = "milvus"`
`66`	`66`
	`67`	`+ // RedisCacheType specifies the Redis vector database backend`
	`68`	`+ RedisCacheType CacheBackendType = "redis"`
	`69`	`+`
`67`	`70`	`// HybridCacheType specifies the hybrid HNSW + Milvus backend`
`68`	`71`	`HybridCacheType CacheBackendType = "hybrid"`
`69`	`72`	`)`