diff --git a/Makefile b/Makefile index 9f3f4258..64b51766 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,66 @@ -.PHONY: all build clean test docker-build podman-build docker-run podman-run +.PHONY: all build clean test docker-build podman-build docker-run podman-run start-milvus stop-milvus restart-milvus milvus-status clean-milvus test-milvus-cache test-semantic-router-milvus help # Default target all: build +# Help target +help: + @echo "Available targets:" + @echo " Build targets:" + @echo " all - Build everything (default)" + @echo " build - Build Rust library and Go router" + @echo " rust - Build only the Rust library" + @echo " build-router - Build only the Go router" + @echo " clean - Clean build artifacts" + @echo "" + @echo " Run targets:" + @echo " run-router - Run the router (CONFIG_FILE=config/config.yaml)" + @echo " run-envoy - Run Envoy proxy" + @echo "" + @echo " Test targets:" + @echo " test - Run all tests" + @echo " test-binding - Test candle-binding" + @echo " test-semantic-router - Test semantic router" + @echo " test-category-classifier - Test category classifier" + @echo " test-pii-classifier - Test PII classifier" + @echo " test-jailbreak-classifier - Test jailbreak classifier" + @echo "" + @echo " Milvus targets (CONTAINER_RUNTIME=docker|podman):" + @echo " start-milvus - Start Milvus container for testing" + @echo " stop-milvus - Stop and remove Milvus container" + @echo " restart-milvus - Restart Milvus container" + @echo " milvus-status - Check Milvus container status" + @echo " clean-milvus - Stop container and clean data" + @echo " test-milvus-cache - Test cache with Milvus backend" + @echo " test-semantic-router-milvus - Test router with Milvus cache" + @echo " Example: CONTAINER_RUNTIME=podman make start-milvus" + @echo "" + @echo " Demo targets:" + @echo " test-auto-prompt-reasoning - Test reasoning mode" + @echo " test-auto-prompt-no-reasoning - Test normal mode" + @echo " test-pii - Test PII detection" + @echo " test-prompt-guard - Test jailbreak detection" + @echo " test-tools - Test tool auto-selection" + @echo "" + @echo " Documentation targets:" + @echo " docs-dev - Start documentation dev server" + @echo " docs-build - Build documentation" + @echo " docs-serve - Serve built documentation" + @echo " docs-clean - Clean documentation artifacts" + @echo "" + @echo " Environment variables:" + @echo " CONTAINER_RUNTIME - Container runtime (docker|podman, default: docker)" + @echo " CONFIG_FILE - Config file path (default: config/config.yaml)" + @echo " VLLM_ENDPOINT - vLLM endpoint URL for testing" + @echo "" + @echo " Usage examples:" + @echo " make start-milvus # Use Docker (default)" + @echo " CONTAINER_RUNTIME=podman make start-milvus # Use Podman" + @echo " CONFIG_FILE=custom.yaml make run-router # Use custom config" + +# Container runtime (docker or podman) +CONTAINER_RUNTIME ?= docker + # vLLM env var VLLM_ENDPOINT ?= @@ -30,7 +88,7 @@ rust: build-router: rust @echo "Building router..." @mkdir -p bin - @cd src/semantic-router && go build -o ../../bin/router cmd/main.go + @cd src/semantic-router && go build --tags=milvus -o ../../bin/router cmd/main.go # Config file path with default CONFIG_FILE ?= config/config.yaml @@ -104,9 +162,12 @@ test-jailbreak-classifier: rust cd src/training/prompt_guard_fine_tuning && CGO_ENABLED=1 go run jailbreak_classifier_verifier.go # Unit test semantic-router +# By default, Milvus tests are skipped. To enable them, set SKIP_MILVUS_TESTS=false +# Example: make test-semantic-router SKIP_MILVUS_TESTS=false test-semantic-router: build-router @echo "Testing semantic-router..." @export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \ + export SKIP_MILVUS_TESTS=$${SKIP_MILVUS_TESTS:-true} && \ cd src/semantic-router && CGO_ENABLED=1 go test -v ./... # Test the Rust library and the Go binding @@ -195,6 +256,65 @@ download-models: hf download LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model --local-dir models/pii_classifier_modernbert-base_presidio_token_model; \ fi +# Milvus container management +start-milvus: + @echo "Starting Milvus container for testing with $(CONTAINER_RUNTIME)..." + @mkdir -p /tmp/milvus-data + @$(CONTAINER_RUNTIME) run -d \ + --name milvus-semantic-cache \ + --security-opt seccomp:unconfined \ + -e ETCD_USE_EMBED=true \ + -e ETCD_DATA_DIR=/var/lib/milvus/etcd \ + -e ETCD_CONFIG_PATH=/milvus/configs/advanced/etcd.yaml \ + -e COMMON_STORAGETYPE=local \ + -e CLUSTER_ENABLED=false \ + -p 19530:19530 \ + -p 9091:9091 \ + -v /tmp/milvus-data:/var/lib/milvus \ + milvusdb/milvus:v2.3.3 \ + milvus run standalone + @echo "Waiting for Milvus to be ready..." + @sleep 15 + @echo "Milvus should be available at localhost:19530" + +stop-milvus: + @echo "Stopping Milvus container..." + @$(CONTAINER_RUNTIME) stop milvus-semantic-cache || true + @$(CONTAINER_RUNTIME) rm milvus-semantic-cache || true + @sudo rm -rf /tmp/milvus-data || true + @echo "Milvus container stopped and removed" + +restart-milvus: stop-milvus start-milvus + +milvus-status: + @echo "Checking Milvus container status..." + @if $(CONTAINER_RUNTIME) ps --filter "name=milvus-semantic-cache" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" | grep -q milvus-semantic-cache; then \ + echo "Milvus container is running:"; \ + $(CONTAINER_RUNTIME) ps --filter "name=milvus-semantic-cache" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"; \ + else \ + echo "Milvus container is not running"; \ + echo "Run 'make start-milvus' to start it"; \ + fi + +clean-milvus: stop-milvus + @echo "Cleaning up Milvus data..." + @sudo rm -rf milvus-data || rm -rf milvus-data + @echo "Milvus data directory cleaned" + +# Test semantic cache with Milvus backend +test-milvus-cache: start-milvus rust + @echo "Testing semantic cache with Milvus backend..." + @export LD_LIBRARY_PATH=$${PWD}/candle-binding/target/release && \ + cd src/semantic-router && CGO_ENABLED=1 go test -tags=milvus -v ./pkg/cache/ + @echo "Consider running 'make stop-milvus' when done testing" + +# Test semantic-router with Milvus enabled +test-semantic-router-milvus: build-router start-milvus + @echo "Testing semantic-router with Milvus cache backend..." + @export LD_LIBRARY_PATH=$${PWD}/candle-binding/target/release && \ + cd src/semantic-router && CGO_ENABLED=1 go test -tags=milvus -v ./... + @echo "Consider running 'make stop-milvus' when done testing" + # Documentation targets docs-install: @echo "Installing documentation dependencies..." diff --git a/config/cache/milvus.yaml b/config/cache/milvus.yaml new file mode 100644 index 00000000..bac19b4d --- /dev/null +++ b/config/cache/milvus.yaml @@ -0,0 +1,171 @@ +# Milvus Vector Database Configuration for Semantic Cache +# +# This configuration file contains settings for using Milvus as the semantic cache backend. +# To use this configuration: +# 1. Set backend_type: "milvus" in your main config.yaml +# 2. Set backend_config_path: "config/cache/milvus.yaml" in your main config.yaml +# 3. Ensure Milvus server is running and accessible +# 4. Build with Milvus support: go build -tags=milvus + +# Milvus connection settings +connection: + # Milvus server host (change for production deployment) + host: "localhost" # For production: use your Milvus cluster endpoint + + # Milvus server port + port: 19530 # Standard Milvus port + + # Database name (optional, defaults to "default") + database: "semantic_router_cache" + + # Connection timeout in seconds + timeout: 30 + + # Authentication (enable for production) + auth: + enabled: false # Set to true for production + username: "" # Your Milvus username + password: "" # Your Milvus password + + # TLS/SSL configuration (recommended for production) + tls: + enabled: false # Set to true for secure connections + cert_file: "" # Path to client certificate + key_file: "" # Path to client private key + ca_file: "" # Path to CA certificate + +# Collection settings +collection: + # Name of the collection to store cache entries + name: "semantic_cache" + + # Description of the collection + description: "Semantic cache for LLM request-response pairs" + + # Vector field configuration + vector_field: + # Name of the vector field + name: "embedding" + + # Dimension of the embeddings (auto-detected from model at runtime) + dimension: 384 # This value is ignored - dimension is auto-detected from the embedding model + + # Metric type for similarity calculation + metric_type: "IP" # Inner Product (cosine similarity for normalized vectors) + + # Index configuration for the vector field + index: + # Index type (HNSW is recommended for most use cases) + type: "HNSW" + + # Index parameters + params: + M: 16 # Number of bi-directional links for each node + efConstruction: 64 # Search scope during index construction + +# Search configuration +search: + # Search parameters + params: + ef: 64 # Search scope during search (should be >= topk) + + # Number of top results to retrieve for similarity comparison + topk: 10 + + # Consistency level for search operations + consistency_level: "Session" # Options: Strong, Session, Bounded, Eventually + +# Performance and resource settings +performance: + # Connection pool settings + connection_pool: + # Maximum number of connections in the pool + max_connections: 10 + + # Maximum idle connections + max_idle_connections: 5 + + # Connection timeout for acquiring from pool + acquire_timeout: 5 + + # Batch operation settings + batch: + # Maximum batch size for insert operations + insert_batch_size: 1000 + + # Batch timeout in seconds + timeout: 30 + +# Data management +data_management: + # Automatic data expiration (TTL) settings + ttl: + # Enable automatic TTL-based cleanup (requires TTL to be set in main config) + enabled: true + + # Field name to store timestamp for TTL calculation + timestamp_field: "timestamp" + + # Cleanup interval in seconds (how often to run cleanup) + cleanup_interval: 3600 # 1 hour + + # Compaction settings + compaction: + # Enable automatic compaction + enabled: true + + # Compaction interval in seconds + interval: 86400 # 24 hours + +# Logging and monitoring +logging: + # Log level for Milvus client operations (debug, info, warn, error) + level: "info" + + # Enable query/search logging for debugging + enable_query_log: false + + # Enable performance metrics collection + enable_metrics: true + +# Development and debugging settings +development: + # Drop collection on startup (WARNING: This will delete all cached data) + drop_collection_on_startup: true # Enable for development to test dynamic dimensions + + # Create collection if it doesn't exist + auto_create_collection: true + + # Print detailed error messages + verbose_errors: true + +# Example configurations for different environments: +# +# Local Development (Docker): +# connection: +# host: "localhost" +# port: 19530 +# auth: +# enabled: false +# development: +# drop_collection_on_startup: true # Clean start for development +# +# Production (Zilliz Cloud): +# connection: +# host: "your-cluster-endpoint.zillizcloud.com" +# port: 443 +# auth: +# enabled: true +# username: "your-username" +# password: "your-password" +# tls: +# enabled: true +# development: +# drop_collection_on_startup: false +# auto_create_collection: false # Pre-create collections in production +# +# Kubernetes Deployment: +# connection: +# host: "milvus-service.milvus-system.svc.cluster.local" +# port: 19530 +# timeout: 60 # Longer timeout for cluster environments diff --git a/config/config.yaml b/config/config.yaml index 2f722822..32f585e7 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -4,9 +4,25 @@ bert_model: use_cpu: true semantic_cache: enabled: true + backend_type: "memory" # Options: "memory" or "milvus" similarity_threshold: 0.8 - max_entries: 1000 + max_entries: 1000 # Only applies to memory backend ttl_seconds: 3600 + + # For production environments, use Milvus for scalable caching: + # backend_type: "milvus" + # backend_config_path: "config/cache/milvus.yaml" + + # Development/Testing: Use in-memory cache (current configuration) + # - Fast startup and no external dependencies + # - Limited to single instance scaling + # - Data lost on restart + + # Production: Use Milvus vector database + # - Horizontally scalable and persistent + # - Supports distributed deployments + # - Requires Milvus cluster setup + # - To enable: uncomment the lines above and install Milvus dependencies tools: enabled: true # Set to true to enable automatic tool selection top_k: 3 # Number of most relevant tools to select diff --git a/src/semantic-router/go.mod b/src/semantic-router/go.mod index 3b6513b2..da335254 100644 --- a/src/semantic-router/go.mod +++ b/src/semantic-router/go.mod @@ -4,13 +4,17 @@ go 1.24.1 replace ( github.com/vllm-project/semantic-router/candle-binding => ../../candle-binding + github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache => ./pkg/cache github.com/vllm-project/semantic-router/src/semantic-router/pkg/config => ./pkg/config github.com/vllm-project/semantic-router/src/semantic-router/pkg/extproc => ./pkg/extproc + github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics => ./pkg/metrics + github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability => ./pkg/observability ) require ( github.com/envoyproxy/go-control-plane/envoy v1.32.4 github.com/fsnotify/fsnotify v1.7.0 + github.com/milvus-io/milvus-sdk-go/v2 v2.4.2 github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/gomega v1.38.0 github.com/openai/openai-go v1.12.0 @@ -25,12 +29,23 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3 // indirect + github.com/cockroachdb/errors v1.9.1 // indirect + github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f // indirect + github.com/cockroachdb/redact v1.1.3 // indirect github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect + github.com/getsentry/sentry-go v0.12.0 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.4 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect + github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect + github.com/kr/pretty v0.3.1 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/milvus-io/milvus-proto/go-api/v2 v2.4.10-0.20240819025435-512e3b98866a // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/errors v0.9.1 // indirect github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.65.0 // indirect @@ -43,6 +58,7 @@ require ( go.uber.org/automaxprocs v1.6.0 // indirect go.uber.org/multierr v1.10.0 // indirect golang.org/x/net v0.41.0 // indirect + golang.org/x/sync v0.15.0 // indirect golang.org/x/sys v0.33.0 // indirect golang.org/x/text v0.26.0 // indirect golang.org/x/tools v0.33.0 // indirect diff --git a/src/semantic-router/go.sum b/src/semantic-router/go.sum index 77570f5b..acb526d9 100644 --- a/src/semantic-router/go.sum +++ b/src/semantic-router/go.sum @@ -1,47 +1,212 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/CloudyKit/fastprinter v0.0.0-20200109182630-33d98a066a53/go.mod h1:+3IMCy2vIlbG1XG/0ggNQv0SvxCAIpPM5b1nCz56Xno= +github.com/CloudyKit/jet/v3 v3.0.0/go.mod h1:HKQPgSJmdK8hdoAbKUUWajkHyHo4RaU5rMdUywE7VMo= +github.com/Joker/hpp v1.0.0/go.mod h1:8x5n+M1Hp5hC0g8okX3sR3vFQwynaX/UgSOM9MeBKzY= +github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0= +github.com/ajg/form v1.5.1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY= +github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= +github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3 h1:boJj011Hh+874zpIySeApCX4GeOjPl9qhRF3QuIZq+Q= github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= +github.com/cockroachdb/datadriven v1.0.2/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU= +github.com/cockroachdb/errors v1.9.1 h1:yFVvsI0VxmRShfawbt/laCIDy/mtTqqnvoNgiy5bEV8= +github.com/cockroachdb/errors v1.9.1/go.mod h1:2sxOtL2WIc096WSZqZ5h8fa17rdDq9HZOZLBCor4mBk= +github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f h1:6jduT9Hfc0njg5jJ1DdKCFPdMBrp/mdZfCpa5h+WM74= +github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= +github.com/cockroachdb/redact v1.1.3 h1:AKZds10rFSIj7qADf0g46UixK8NNLwWTNdCIGS5wfSQ= +github.com/cockroachdb/redact v1.1.3/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= +github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0/go.mod h1:4Zcjuz89kmFXt9morQgcfYZAYZ5n8WHjt81YYWIwtTM= +github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= +github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4= +github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= +github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A= github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8= github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU= +github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw= +github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072/go.mod h1:duJ4Jxv5lDcvg4QuQr0oowTf7dz4/CR8NtyCooz9HL8= +github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/gavv/httpexpect v2.0.0+incompatible/go.mod h1:x+9tiU1YnrOvnB725RkpoLv1M62hOWzwo5OXotisrKc= +github.com/getsentry/sentry-go v0.12.0 h1:era7g0re5iY13bHSdN/xMkyV+5zZppjRVQhZrXCaEIk= +github.com/getsentry/sentry-go v0.12.0/go.mod h1:NSap0JBYWzHND8oMbyi0+XZhUalc1TBdRL1M71JZW2c= +github.com/gin-contrib/sse v0.0.0-20190301062529-5545eab6dad3/go.mod h1:VJ0WA2NBN22VlZ2dKZQPAPnyWw5XTlK1KymzLKsr59s= +github.com/gin-gonic/gin v1.4.0/go.mod h1:OW2EZn3DO8Ln9oIKOvM++LBO+5UPHJJDH72/q/3rZdM= +github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= +github.com/go-errors/errors v1.0.1 h1:LUHzmkK3GUKUrL/1gfBUxAHzcev3apQlezX/+O7ma6w= +github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q= +github.com/go-faker/faker/v4 v4.1.0 h1:ffuWmpDrducIUOO0QSKSF5Q2dxAht+dhsT9FvVHhPEI= +github.com/go-faker/faker/v4 v4.1.0/go.mod h1:uuNc0PSRxF8nMgjGrrrU4Nw5cF30Jc6Kd0/FUTTYbhg= +github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8= +github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= +github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= +github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= +github.com/gogo/googleapis v0.0.0-20180223154316-0cd9801be74a/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= +github.com/gogo/googleapis v1.4.1/go.mod h1:2lpHqI5OcWCtVElxXnPt+s8oJvMpySlOyM6xDCrzib4= +github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/gogo/status v1.1.0/go.mod h1:BFv9nrluPLmrS0EmGVvLaPNmRosr9KapBYd5/hpY1WM= +github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/gomodule/redigo v1.7.1-0.20190724094224-574c33c3df38/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= +github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= +github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/hydrogen18/memlistener v0.0.0-20200120041712-dcc25e7acd91/go.mod h1:qEIFzExnS6016fRpRfxrExeVn2gbClQA99gQhnIcdhE= +github.com/imkira/go-interpol v1.1.0/go.mod h1:z0h2/2T3XF8kyEPpRgJ3kmNv+C43p+I/CoI+jC3w2iA= +github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/iris-contrib/blackfriday v2.0.0+incompatible/go.mod h1:UzZ2bDEoaSGPbkg6SAB4att1aAwTmVIx/5gCVqeyUdI= +github.com/iris-contrib/go.uuid v2.0.0+incompatible/go.mod h1:iz2lgM/1UnEf1kP0L/+fafWORmlnuysV2EMP8MW+qe0= +github.com/iris-contrib/jade v1.1.3/go.mod h1:H/geBymxJhShH5kecoiOCSssPX7QWYH7UaeZTSWddIk= +github.com/iris-contrib/pongo2 v0.0.1/go.mod h1:Ssh+00+3GAZqSQb30AvBRNxBx7rf0GqwkjqxNd0u65g= +github.com/iris-contrib/schema v0.0.1/go.mod h1:urYA3uvUNG1TIIjOSCzHr9/LmbQo8LrOcOqfqxa4hXw= +github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= +github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88/go.mod h1:3w7q1U84EfirKl04SVQ/s7nPm1ZPhiXd34z40TNz36k= +github.com/kataras/golog v0.0.10/go.mod h1:yJ8YKCmyL+nWjERB90Qwn+bdyBZsaQwU3bTVFgkFIp8= +github.com/kataras/iris/v12 v12.1.8/go.mod h1:LMYy4VlP67TQ3Zgriz8RE2h2kMZV2SgMYbq3UhfoFmE= +github.com/kataras/neffos v0.0.14/go.mod h1:8lqADm8PnbeFfL7CLXh1WHw53dG27MC3pgi2R1rmoTE= +github.com/kataras/pio v0.0.2/go.mod h1:hAoW0t9UmXi4R5Oyq5Z4irTbaTsOemSrDGUtaTl7Dro= +github.com/kataras/sitemap v0.0.5/go.mod h1:KY2eugMKiPwsJgx7+U103YZehfvNGOXURubcGyk0Bz8= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= +github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/labstack/echo/v4 v4.5.0/go.mod h1:czIriw4a0C1dFun+ObrXp7ok03xON0N1awStJ6ArI7Y= +github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= +github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.11/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= +github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/goveralls v0.0.2/go.mod h1:8d1ZMHsd7fW6IRPKQh46F2WRpyib5/X4FOpevwGNQEw= +github.com/mediocregopher/radix/v3 v3.4.2/go.mod h1:8FL3F6UQRXHXIBSPUs5h0RybMF8i4n7wVopoX3x7Bv8= +github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= +github.com/milvus-io/milvus-proto/go-api/v2 v2.4.10-0.20240819025435-512e3b98866a h1:0B/8Fo66D8Aa23Il0yrQvg1KKz92tE/BJ5BvkUxxAAk= +github.com/milvus-io/milvus-proto/go-api/v2 v2.4.10-0.20240819025435-512e3b98866a/go.mod h1:1OIl0v5PQeNxIJhCvY+K55CBUOYDZevw9g9380u1Wek= +github.com/milvus-io/milvus-sdk-go/v2 v2.4.2 h1:Xqf+S7iicElwYoS2Zly8Nf/zKHuZsNy1xQajfdtygVY= +github.com/milvus-io/milvus-sdk-go/v2 v2.4.2/go.mod h1:ulO1YUXKH0PGg50q27grw048GDY9ayB4FPmh7D+FFTA= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= +github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= +github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= +github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus= github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= github.com/onsi/gomega v1.38.0 h1:c/WX+w8SLAinvuKKQFh77WEucCnPk4j2OTUr7lt7BeY= github.com/onsi/gomega v1.38.0/go.mod h1:OcXcwId0b9QsE7Y49u+BTrL4IdKOBOKnD6VQNTJEB6o= github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0= github.com/openai/openai-go v1.12.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y= +github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= +github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= +github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4= +github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -50,14 +215,41 @@ github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4 github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc= github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE= github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= +github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= +github.com/rogpeppe/go-internal v1.8.1/go.mod h1:JeRgkft04UBgHMgCIwADu4Pn6Mtm5d4nPKWu0nJ5d+o= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= +github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/schollz/closestmatch v2.1.0+incompatible/go.mod h1:RtP1ddjLong6gTkbtmuhtR2uUrrJOpYzYRvbcPAid+g= +github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= +github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= +github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= +github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= +github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= +github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= +github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= @@ -70,6 +262,27 @@ github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= +github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= +github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= +github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= +github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= +github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w= +github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= +github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= +github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= +github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= +github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= +github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= +github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/yalp/jsonpath v0.0.0-20180802001716-5cc68e5049a0/go.mod h1:/LWChgwKmvncFJFHJ7Gvn9wZArjbV5/FppcK2fKk/tI= +github.com/yudai/gojsondiff v1.0.0/go.mod h1:AY32+k2cwILAkW1fbgxQ5mUmMiZFgLIV+FBNExI05xg= +github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82/go.mod h1:lgjkn3NuSvDfVJdfcVVdX+jpBxNmX4rDAzaS45IcYoM= +github.com/yudai/pp v2.0.1+incompatible/go.mod h1:PuxR/8QJ7cyCkFp/aUDS+JY727OFEZkTdatxwunjIkc= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY= @@ -82,30 +295,168 @@ go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w= go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k= go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE= +go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ= go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191227163750-53104e6ec876/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190327091125-710a502c58a2/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/net v0.0.0-20211008194852-3b03d305991f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= +golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= +golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181221001348-537d06c36207/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190327201419-c70d86f8b7cf/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc= golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180518175338-11a468237815/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24= google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f h1:OxYkA3wjPsZyBylwymxSHa7ViiW1Sml4ToBrncvFehI= google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f/go.mod h1:+2Yz8+CLJbIfL9z73EW45avw8Lmge3xVElCP9zEKi50= +google.golang.org/grpc v1.12.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= +google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= google.golang.org/grpc v1.71.1 h1:ffsFWr7ygTUscGPI0KKK6TLrGz0476KUvvsbqWK0rPI= google.golang.org/grpc v1.71.1/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec= +google.golang.org/grpc/examples v0.0.0-20220617181431-3e7b97febc7f h1:rqzndB2lIQGivcXdTuY3Y9NBvr70X+y77woofSRluec= +google.golang.org/grpc/examples v0.0.0-20220617181431-3e7b97febc7f/go.mod h1:gxndsbNG1n4TZcHGgsYEfVGnTxqfEdfiDv6/DADXX9o= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE= +gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/RRjR0eouCJSH80/M2Y= +gopkg.in/ini.v1 v1.51.1/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20191120175047-4206685974f2/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/src/semantic-router/pkg/cache/cache.go b/src/semantic-router/pkg/cache/cache.go index 6dd4be2f..f2f3e814 100644 --- a/src/semantic-router/pkg/cache/cache.go +++ b/src/semantic-router/pkg/cache/cache.go @@ -3,304 +3,28 @@ package cache import ( "encoding/json" "fmt" - "log" - "sort" - "sync" - "time" - - candle_binding "github.com/vllm-project/semantic-router/candle-binding" ) -// CacheEntry represents a cached request-response pair -type CacheEntry struct { - RequestBody []byte - ResponseBody []byte - Model string - Query string - Embedding []float32 - Timestamp time.Time -} - -// SemanticCache implements a semantic cache using BERT embeddings -type SemanticCache struct { - entries []CacheEntry - mu sync.RWMutex - similarityThreshold float32 - maxEntries int - ttlSeconds int - enabled bool -} - -// SemanticCacheOptions holds options for creating a new semantic cache -type SemanticCacheOptions struct { - SimilarityThreshold float32 - MaxEntries int - TTLSeconds int - Enabled bool -} - -// NewSemanticCache creates a new semantic cache with the given options -func NewSemanticCache(options SemanticCacheOptions) *SemanticCache { - return &SemanticCache{ - entries: []CacheEntry{}, - similarityThreshold: options.SimilarityThreshold, - maxEntries: options.MaxEntries, - ttlSeconds: options.TTLSeconds, - enabled: options.Enabled, - } -} - -// IsEnabled returns whether the cache is enabled -func (c *SemanticCache) IsEnabled() bool { - return c.enabled -} - -// AddPendingRequest adds a pending request to the cache (without response yet) -func (c *SemanticCache) AddPendingRequest(model string, query string, requestBody []byte) (string, error) { - if !c.enabled { - return query, nil - } - - // Generate embedding for the query - embedding, err := candle_binding.GetEmbedding(query, 512) - if err != nil { - return "", fmt.Errorf("failed to generate embedding: %w", err) - } - - c.mu.Lock() - defer c.mu.Unlock() - - // Cleanup expired entries if TTL is set - c.cleanupExpiredEntries() - - // Create a new entry with the pending request - entry := CacheEntry{ - RequestBody: requestBody, - Model: model, - Query: query, - Embedding: embedding, - Timestamp: time.Now(), - } - - c.entries = append(c.entries, entry) - // log.Printf("Added pending cache entry for: %s", query) - - // Enforce max entries limit if set - if c.maxEntries > 0 && len(c.entries) > c.maxEntries { - // Sort by timestamp (oldest first) - sort.Slice(c.entries, func(i, j int) bool { - return c.entries[i].Timestamp.Before(c.entries[j].Timestamp) - }) - // Remove oldest entries - c.entries = c.entries[len(c.entries)-c.maxEntries:] - log.Printf("Trimmed cache to %d entries", c.maxEntries) - } - - return query, nil -} - -// UpdateWithResponse updates a pending request with its response -func (c *SemanticCache) UpdateWithResponse(query string, responseBody []byte) error { - if !c.enabled { - return nil - } - - c.mu.Lock() - defer c.mu.Unlock() - - // Cleanup expired entries while we have the write lock - c.cleanupExpiredEntries() - - // Find the pending request by query - for i, entry := range c.entries { - if entry.Query == query && entry.ResponseBody == nil { - // Update with response - c.entries[i].ResponseBody = responseBody - c.entries[i].Timestamp = time.Now() - // log.Printf("Cache entry updated: %s", query) - return nil - } - } - - return fmt.Errorf("no pending request found for query: %s", query) -} - -// AddEntry adds a complete entry to the cache -func (c *SemanticCache) AddEntry(model string, query string, requestBody, responseBody []byte) error { - if !c.enabled { - return nil - } - - // Generate embedding for the query - embedding, err := candle_binding.GetEmbedding(query, 512) - if err != nil { - return fmt.Errorf("failed to generate embedding: %w", err) - } - - entry := CacheEntry{ - RequestBody: requestBody, - ResponseBody: responseBody, - Model: model, - Query: query, - Embedding: embedding, - Timestamp: time.Now(), - } - - c.mu.Lock() - defer c.mu.Unlock() - - // Cleanup expired entries - c.cleanupExpiredEntries() - - c.entries = append(c.entries, entry) - log.Printf("Added cache entry: %s", query) - - // Enforce max entries limit - if c.maxEntries > 0 && len(c.entries) > c.maxEntries { - // Sort by timestamp (oldest first) - sort.Slice(c.entries, func(i, j int) bool { - return c.entries[i].Timestamp.Before(c.entries[j].Timestamp) - }) - // Remove oldest entries - c.entries = c.entries[len(c.entries)-c.maxEntries:] - } - - return nil -} - -// FindSimilar looks for a similar request in the cache -func (c *SemanticCache) FindSimilar(model string, query string) ([]byte, bool, error) { - if !c.enabled { - return nil, false, nil - } - - // Generate embedding for the query - queryEmbedding, err := candle_binding.GetEmbedding(query, 512) - if err != nil { - return nil, false, fmt.Errorf("failed to generate embedding: %w", err) - } - - c.mu.RLock() - defer c.mu.RUnlock() - - // Cleanup expired entries - c.cleanupExpiredEntriesReadOnly() - - type SimilarityResult struct { - Entry CacheEntry - Similarity float32 - } - - // Only compare with entries that have responses - results := make([]SimilarityResult, 0, len(c.entries)) - for _, entry := range c.entries { - if entry.ResponseBody == nil { - continue // Skip entries without responses - } - - // Only compare with entries with the same model - if entry.Model != model { - continue - } - - // Calculate similarity - var dotProduct float32 - for i := 0; i < len(queryEmbedding) && i < len(entry.Embedding); i++ { - dotProduct += queryEmbedding[i] * entry.Embedding[i] - } - - results = append(results, SimilarityResult{ - Entry: entry, - Similarity: dotProduct, - }) - } - - // No results found - if len(results) == 0 { - return nil, false, nil - } - - // Sort by similarity (highest first) - sort.Slice(results, func(i, j int) bool { - return results[i].Similarity > results[j].Similarity - }) - - // Check if the best match exceeds the threshold - if results[0].Similarity >= c.similarityThreshold { - log.Printf("Cache hit: similarity=%.4f, threshold=%.4f", - results[0].Similarity, c.similarityThreshold) - return results[0].Entry.ResponseBody, true, nil - } - - log.Printf("Cache miss: best similarity=%.4f, threshold=%.4f", - results[0].Similarity, c.similarityThreshold) - return nil, false, nil -} - -// cleanupExpiredEntries removes expired entries from the cache -// Assumes the caller holds a write lock -func (c *SemanticCache) cleanupExpiredEntries() { - if c.ttlSeconds <= 0 { - return - } - - now := time.Now() - validEntries := make([]CacheEntry, 0, len(c.entries)) - - for _, entry := range c.entries { - // Keep entries that haven't expired - if now.Sub(entry.Timestamp).Seconds() < float64(c.ttlSeconds) { - validEntries = append(validEntries, entry) - } - } - - if len(validEntries) < len(c.entries) { - log.Printf("Removed %d expired cache entries", len(c.entries)-len(validEntries)) - c.entries = validEntries - } -} - -// cleanupExpiredEntriesReadOnly checks for expired entries but doesn't modify the cache -// Used during read operations where we only have a read lock -func (c *SemanticCache) cleanupExpiredEntriesReadOnly() { - if c.ttlSeconds <= 0 { - return - } - - now := time.Now() - expiredCount := 0 - - for _, entry := range c.entries { - if now.Sub(entry.Timestamp).Seconds() >= float64(c.ttlSeconds) { - expiredCount++ - } - } - - if expiredCount > 0 { - log.Printf("Found %d expired cache entries during read operation", expiredCount) - } -} - -// ChatMessage represents a message in the OpenAI chat format +// ChatMessage represents a message in the OpenAI chat format with role and content type ChatMessage struct { Role string `json:"role"` Content string `json:"content"` } -// OpenAIRequest represents an OpenAI API request +// OpenAIRequest represents the structure of an OpenAI API request type OpenAIRequest struct { Model string `json:"model"` Messages []ChatMessage `json:"messages"` } -// ExtractQueryFromOpenAIRequest extracts the user query from an OpenAI request +// ExtractQueryFromOpenAIRequest parses an OpenAI request and extracts the user query func ExtractQueryFromOpenAIRequest(requestBody []byte) (string, string, error) { var req OpenAIRequest if err := json.Unmarshal(requestBody, &req); err != nil { return "", "", fmt.Errorf("invalid request body: %w", err) } - // Extract user messages + // Find user messages in the conversation var userMessages []string for _, msg := range req.Messages { if msg.Role == "user" { @@ -308,10 +32,10 @@ func ExtractQueryFromOpenAIRequest(requestBody []byte) (string, string, error) { } } - // Join all user messages + // Use the most recent user message as the query query := "" if len(userMessages) > 0 { - query = userMessages[len(userMessages)-1] // Use the last user message + query = userMessages[len(userMessages)-1] } return req.Model, query, nil diff --git a/src/semantic-router/pkg/cache/cache_factory.go b/src/semantic-router/pkg/cache/cache_factory.go new file mode 100644 index 00000000..396d1eb6 --- /dev/null +++ b/src/semantic-router/pkg/cache/cache_factory.go @@ -0,0 +1,143 @@ +package cache + +import ( + "fmt" + "os" + + "github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability" +) + +// NewCacheBackend creates a cache backend instance from the provided configuration +func NewCacheBackend(config CacheConfig) (CacheBackend, error) { + if !config.Enabled { + // Create a disabled cache backend + observability.Debugf("Cache disabled - creating disabled in-memory cache backend") + return NewInMemoryCache(InMemoryCacheOptions{ + Enabled: false, + }), nil + } + + switch config.BackendType { + case InMemoryCacheType, "": + // Use in-memory cache as the default backend + observability.Debugf("Creating in-memory cache backend - MaxEntries: %d, TTL: %ds, Threshold: %.3f", + config.MaxEntries, config.TTLSeconds, config.SimilarityThreshold) + options := InMemoryCacheOptions{ + Enabled: config.Enabled, + SimilarityThreshold: config.SimilarityThreshold, + MaxEntries: config.MaxEntries, + TTLSeconds: config.TTLSeconds, + } + return NewInMemoryCache(options), nil + + case MilvusCacheType: + observability.Debugf("Creating Milvus cache backend - ConfigPath: %s, TTL: %ds, Threshold: %.3f", + config.BackendConfigPath, config.TTLSeconds, config.SimilarityThreshold) + if config.BackendConfigPath == "" { + return nil, fmt.Errorf("backend_config_path is required for Milvus cache backend") + } + + // Ensure the Milvus configuration file exists + if _, err := os.Stat(config.BackendConfigPath); os.IsNotExist(err) { + observability.Debugf("Milvus config file not found: %s", config.BackendConfigPath) + return nil, fmt.Errorf("Milvus config file not found: %s", config.BackendConfigPath) + } + observability.Debugf("Milvus config file found: %s", config.BackendConfigPath) + + options := MilvusCacheOptions{ + Enabled: config.Enabled, + SimilarityThreshold: config.SimilarityThreshold, + TTLSeconds: config.TTLSeconds, + ConfigPath: config.BackendConfigPath, + } + return NewMilvusCache(options) + + default: + observability.Debugf("Unsupported cache backend type: %s", config.BackendType) + return nil, fmt.Errorf("unsupported cache backend type: %s", config.BackendType) + } +} + +// ValidateCacheConfig validates cache configuration parameters +func ValidateCacheConfig(config CacheConfig) error { + if !config.Enabled { + return nil // Skip validation for disabled cache + } + + // Check similarity threshold range + if config.SimilarityThreshold < 0.0 || config.SimilarityThreshold > 1.0 { + return fmt.Errorf("similarity_threshold must be between 0.0 and 1.0, got: %f", config.SimilarityThreshold) + } + + // Check TTL value + if config.TTLSeconds < 0 { + return fmt.Errorf("ttl_seconds cannot be negative, got: %d", config.TTLSeconds) + } + + // Check max entries for in-memory cache + if config.BackendType == InMemoryCacheType || config.BackendType == "" { + if config.MaxEntries < 0 { + return fmt.Errorf("max_entries cannot be negative for in-memory cache, got: %d", config.MaxEntries) + } + } + + // Check backend-specific requirements + switch config.BackendType { + case MilvusCacheType: + if config.BackendConfigPath == "" { + return fmt.Errorf("backend_config_path is required for Milvus cache backend") + } + } + + return nil +} + +// GetDefaultCacheConfig provides sensible default cache configuration values +func GetDefaultCacheConfig() CacheConfig { + return CacheConfig{ + BackendType: InMemoryCacheType, + Enabled: true, + SimilarityThreshold: 0.8, + MaxEntries: 1000, + TTLSeconds: 3600, + } +} + +// CacheBackendInfo describes the capabilities and features of a cache backend +type CacheBackendInfo struct { + Type CacheBackendType `json:"type"` + Name string `json:"name"` + Description string `json:"description"` + Features []string `json:"features"` +} + +// GetAvailableCacheBackends returns metadata for all supported cache backends +func GetAvailableCacheBackends() []CacheBackendInfo { + return []CacheBackendInfo{ + { + Type: InMemoryCacheType, + Name: "In-Memory Cache", + Description: "High-performance in-memory semantic cache with BERT embeddings", + Features: []string{ + "Fast access", + "No external dependencies", + "Automatic memory management", + "TTL support", + "Entry limit support", + }, + }, + { + Type: MilvusCacheType, + Name: "Milvus Vector Database", + Description: "Enterprise-grade semantic cache powered by Milvus vector database", + Features: []string{ + "Highly scalable", + "Persistent storage", + "Distributed architecture", + "Advanced indexing", + "High availability", + "TTL support", + }, + }, + } +} diff --git a/src/semantic-router/pkg/cache/cache_interface.go b/src/semantic-router/pkg/cache/cache_interface.go new file mode 100644 index 00000000..b1940f10 --- /dev/null +++ b/src/semantic-router/pkg/cache/cache_interface.go @@ -0,0 +1,80 @@ +package cache + +import "time" + +// CacheEntry represents a complete cached request-response pair with associated metadata +type CacheEntry struct { + RequestBody []byte + ResponseBody []byte + Model string + Query string + Embedding []float32 + Timestamp time.Time +} + +// CacheBackend defines the interface for semantic cache implementations +type CacheBackend interface { + // IsEnabled returns whether caching is currently active + IsEnabled() bool + + // AddPendingRequest stores a request awaiting its response + // Returns the processed query string and any error + AddPendingRequest(model string, query string, requestBody []byte) (string, error) + + // UpdateWithResponse completes a pending request with the received response + UpdateWithResponse(query string, responseBody []byte) error + + // AddEntry stores a complete request-response pair in the cache + AddEntry(model string, query string, requestBody, responseBody []byte) error + + // FindSimilar searches for semantically similar cached requests + // Returns the cached response, match status, and any error + FindSimilar(model string, query string) ([]byte, bool, error) + + // Close releases all resources held by the cache backend + Close() error + + // GetStats provides cache performance and usage metrics + GetStats() CacheStats +} + +// CacheStats holds performance metrics and usage statistics for cache operations +type CacheStats struct { + TotalEntries int `json:"total_entries"` + HitCount int64 `json:"hit_count"` + MissCount int64 `json:"miss_count"` + HitRatio float64 `json:"hit_ratio"` + LastCleanupTime *time.Time `json:"last_cleanup_time,omitempty"` +} + +// CacheBackendType defines the available cache backend implementations +type CacheBackendType string + +const ( + // InMemoryCacheType specifies the in-memory cache backend + InMemoryCacheType CacheBackendType = "memory" + + // MilvusCacheType specifies the Milvus vector database backend + MilvusCacheType CacheBackendType = "milvus" +) + +// CacheConfig contains configuration settings shared across all cache backends +type CacheConfig struct { + // BackendType specifies which cache implementation to use + BackendType CacheBackendType `yaml:"backend_type"` + + // Enabled controls whether semantic caching is active + Enabled bool `yaml:"enabled"` + + // SimilarityThreshold defines the minimum similarity score for cache hits (0.0-1.0) + SimilarityThreshold float32 `yaml:"similarity_threshold"` + + // MaxEntries limits the number of cached entries (for in-memory backend) + MaxEntries int `yaml:"max_entries,omitempty"` + + // TTLSeconds sets cache entry expiration time (0 disables expiration) + TTLSeconds int `yaml:"ttl_seconds,omitempty"` + + // BackendConfigPath points to backend-specific configuration files + BackendConfigPath string `yaml:"backend_config_path,omitempty"` +} diff --git a/src/semantic-router/pkg/cache/cache_test.go b/src/semantic-router/pkg/cache/cache_test.go index d4787b47..31b9379f 100644 --- a/src/semantic-router/pkg/cache/cache_test.go +++ b/src/semantic-router/pkg/cache/cache_test.go @@ -1,17 +1,16 @@ package cache_test import ( - "encoding/json" - "fmt" - "sync" + "os" + "path/filepath" + "strings" "testing" - "time" + + candle_binding "github.com/vllm-project/semantic-router/candle-binding" + "github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - - candle "github.com/vllm-project/semantic-router/candle-binding" - "github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache" ) func TestCache(t *testing.T) { @@ -20,680 +19,622 @@ func TestCache(t *testing.T) { } var _ = BeforeSuite(func() { - err := candle.InitModel("", true) + // Initialize BERT model once for all cache tests (Linux only) + err := candle_binding.InitModel("sentence-transformers/all-MiniLM-L6-v2", true) Expect(err).NotTo(HaveOccurred()) }) var _ = Describe("Cache Package", func() { var ( - semanticCache *cache.SemanticCache - defaultOptions cache.SemanticCacheOptions + tempDir string ) BeforeEach(func() { - defaultOptions = cache.SemanticCacheOptions{ - SimilarityThreshold: 0.8, - MaxEntries: 100, - TTLSeconds: 3600, - Enabled: true, - } - semanticCache = cache.NewSemanticCache(defaultOptions) + var err error + tempDir, err = os.MkdirTemp("", "cache_test") + Expect(err).NotTo(HaveOccurred()) }) - Describe("NewSemanticCache", func() { - It("should create a cache with correct options", func() { - options := cache.SemanticCacheOptions{ - SimilarityThreshold: 0.9, - MaxEntries: 50, - TTLSeconds: 1800, - Enabled: true, - } - c := cache.NewSemanticCache(options) - Expect(c).NotTo(BeNil()) - Expect(c.IsEnabled()).To(BeTrue()) - }) - - It("should create a disabled cache when specified", func() { - options := cache.SemanticCacheOptions{ - Enabled: false, - } - c := cache.NewSemanticCache(options) - Expect(c.IsEnabled()).To(BeFalse()) - }) + AfterEach(func() { + os.RemoveAll(tempDir) }) - Describe("IsEnabled", func() { - It("should return the correct enabled status", func() { - enabledCache := cache.NewSemanticCache(cache.SemanticCacheOptions{Enabled: true}) - Expect(enabledCache.IsEnabled()).To(BeTrue()) + Describe("Cache Factory", func() { + Describe("NewCacheBackend", func() { + Context("with memory backend", func() { + It("should create in-memory cache backend successfully", func() { + config := cache.CacheConfig{ + BackendType: cache.InMemoryCacheType, + Enabled: true, + SimilarityThreshold: 0.8, + MaxEntries: 1000, + TTLSeconds: 3600, + } - disabledCache := cache.NewSemanticCache(cache.SemanticCacheOptions{Enabled: false}) - Expect(disabledCache.IsEnabled()).To(BeFalse()) - }) - }) + backend, err := cache.NewCacheBackend(config) + Expect(err).NotTo(HaveOccurred()) + Expect(backend).NotTo(BeNil()) + Expect(backend.IsEnabled()).To(BeTrue()) + }) - Describe("AddEntry", func() { - Context("when cache is enabled", func() { - It("should add a complete entry successfully", func() { - model := "model-a" - query := "What is the capital of France?" - requestBody := []byte(`{"model": "model-a", "messages": [{"role": "user", "content": "What is the capital of France?"}]}`) - responseBody := []byte(`{"choices": [{"message": {"content": "Paris"}}]}`) + It("should create disabled cache when enabled is false", func() { + config := cache.CacheConfig{ + BackendType: cache.InMemoryCacheType, + Enabled: false, + SimilarityThreshold: 0.8, + MaxEntries: 1000, + TTLSeconds: 3600, + } - err := semanticCache.AddEntry(model, query, requestBody, responseBody) - Expect(err).NotTo(HaveOccurred()) - }) + backend, err := cache.NewCacheBackend(config) + Expect(err).NotTo(HaveOccurred()) + Expect(backend).NotTo(BeNil()) + Expect(backend.IsEnabled()).To(BeFalse()) + }) - It("should handle empty query gracefully", func() { - model := "model-a" - query := "" - requestBody := []byte(`{"model": "model-a"}`) - responseBody := []byte(`{"choices": []}`) + It("should default to memory backend when backend_type is empty", func() { + config := cache.CacheConfig{ + BackendType: "", // Empty should default to memory + Enabled: true, + SimilarityThreshold: 0.8, + MaxEntries: 500, + TTLSeconds: 1800, + } - err := semanticCache.AddEntry(model, query, requestBody, responseBody) - // Should not error, but may not generate embedding for empty query - // The actual behavior depends on the candle_binding implementation - Expect(err).To(Or(BeNil(), HaveOccurred())) + backend, err := cache.NewCacheBackend(config) + Expect(err).NotTo(HaveOccurred()) + Expect(backend).NotTo(BeNil()) + Expect(backend.IsEnabled()).To(BeTrue()) + }) }) - Context("with max entries limit", func() { + Context("with Milvus backend", func() { + var milvusConfigPath string + BeforeEach(func() { - options := cache.SemanticCacheOptions{ - SimilarityThreshold: 0.8, - MaxEntries: 3, - TTLSeconds: 0, // No TTL for this test - Enabled: true, + // Skip Milvus tests if environment variable is set + if os.Getenv("SKIP_MILVUS_TESTS") == "true" { + Skip("Milvus tests skipped due to SKIP_MILVUS_TESTS=true") } - semanticCache = cache.NewSemanticCache(options) - }) - It("should enforce max entries limit by removing oldest entries", func() { - // Add entries beyond the limit - for i := 0; i < 5; i++ { - query := fmt.Sprintf("Query %d", i) - model := "model-a" - requestBody := []byte(fmt.Sprintf(`{"query": "%s"}`, query)) - responseBody := []byte(fmt.Sprintf(`{"response": "Response %d"}`, i)) - - err := semanticCache.AddEntry(model, query, requestBody, responseBody) - Expect(err).To(Or(BeNil(), HaveOccurred())) // Embedding generation might fail in test + // Create a test Milvus configuration file + milvusConfigPath = filepath.Join(tempDir, "milvus.yaml") + milvusConfig := ` +connection: + host: "localhost" + port: 19530 + database: "test_cache" + timeout: 30 + +collection: + name: "test_semantic_cache" + description: "Test semantic cache collection" + vector_field: + name: "embedding" + dimension: 512 + metric_type: "IP" + index: + type: "HNSW" + params: + M: 16 + efConstruction: 64 + +search: + params: + ef: 64 + topk: 10 + consistency_level: "Session" + +development: + auto_create_collection: true + verbose_errors: true +` + err := os.WriteFile(milvusConfigPath, []byte(milvusConfig), 0o644) + Expect(err).NotTo(HaveOccurred()) + }) - // Small delay to ensure different timestamps - time.Sleep(time.Millisecond) + It("should return error when backend_config_path is missing", func() { + config := cache.CacheConfig{ + BackendType: cache.MilvusCacheType, + Enabled: true, + SimilarityThreshold: 0.8, + TTLSeconds: 3600, + // BackendConfigPath is missing } - // The cache should not exceed max entries - // We can't directly access the entries count, but we can test the behavior - // by checking that older entries are removed + backend, err := cache.NewCacheBackend(config) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("backend_config_path is required")) + Expect(backend).To(BeNil()) }) - }) - }) - Context("when cache is disabled", func() { - BeforeEach(func() { - options := cache.SemanticCacheOptions{Enabled: false} - semanticCache = cache.NewSemanticCache(options) - }) + It("should return error when backend_config_path file doesn't exist", func() { + config := cache.CacheConfig{ + BackendType: cache.MilvusCacheType, + Enabled: true, + SimilarityThreshold: 0.8, + TTLSeconds: 3600, + BackendConfigPath: "/nonexistent/milvus.yaml", + } - It("should return immediately without error", func() { - model := "model-a" - query := "Test query" - requestBody := []byte(`{"test": "data"}`) - responseBody := []byte(`{"result": "success"}`) + backend, err := cache.NewCacheBackend(config) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("config file not found")) + Expect(backend).To(BeNil()) + }) - err := semanticCache.AddEntry(model, query, requestBody, responseBody) - Expect(err).NotTo(HaveOccurred()) - }) - }) - }) + It("should create Milvus cache backend successfully with valid config", func() { + config := cache.CacheConfig{ + BackendType: cache.MilvusCacheType, + Enabled: true, + SimilarityThreshold: 0.85, + TTLSeconds: 7200, + BackendConfigPath: milvusConfigPath, + } - Describe("AddPendingRequest", func() { - Context("when cache is enabled", func() { - It("should add a pending request and return the query", func() { - model := "model-a" - query := "What is machine learning?" - requestBody := []byte(`{"model": "model-a", "messages": [{"role": "user", "content": "What is machine learning?"}]}`) - - returnedQuery, err := semanticCache.AddPendingRequest(model, query, requestBody) - Expect(err).To(Or(BeNil(), HaveOccurred())) // Embedding generation might fail - if err == nil { - Expect(returnedQuery).To(Equal(query)) - } - }) + backend, err := cache.NewCacheBackend(config) - It("should handle empty query", func() { - model := "model-a" - query := "" - requestBody := []byte(`{"model": "model-a"}`) + // Skip test if Milvus is not reachable + if err != nil { + if strings.Contains(err.Error(), "failed to create Milvus client") || + strings.Contains(err.Error(), "connection") || + strings.Contains(err.Error(), "dial") { + Skip("Milvus server not available: " + err.Error()) + } + // For other errors, fail the test + Expect(err).NotTo(HaveOccurred()) + } else { + // If Milvus is available, creation should succeed + Expect(backend).NotTo(BeNil()) + Expect(backend.IsEnabled()).To(BeTrue()) + } + }) - returnedQuery, err := semanticCache.AddPendingRequest(model, query, requestBody) - // Should handle empty query gracefully - Expect(err).To(Or(BeNil(), HaveOccurred())) - if err == nil { - Expect(returnedQuery).To(Equal(query)) - } - }) - }) + It("should handle disabled Milvus cache", func() { + config := cache.CacheConfig{ + BackendType: cache.MilvusCacheType, + Enabled: false, + SimilarityThreshold: 0.8, + TTLSeconds: 3600, + BackendConfigPath: milvusConfigPath, + } - Context("when cache is disabled", func() { - BeforeEach(func() { - options := cache.SemanticCacheOptions{Enabled: false} - semanticCache = cache.NewSemanticCache(options) + backend, err := cache.NewCacheBackend(config) + Expect(err).NotTo(HaveOccurred()) + Expect(backend).NotTo(BeNil()) + Expect(backend.IsEnabled()).To(BeFalse()) + }) }) - It("should return the query without processing", func() { - model := "model-a" - query := "Test query" - requestBody := []byte(`{"test": "data"}`) + Context("with unsupported backend type", func() { + It("should return error for unsupported backend type", func() { + config := cache.CacheConfig{ + BackendType: "redis", // Unsupported + Enabled: true, + SimilarityThreshold: 0.8, + TTLSeconds: 3600, + } - returnedQuery, err := semanticCache.AddPendingRequest(model, query, requestBody) - Expect(err).NotTo(HaveOccurred()) - Expect(returnedQuery).To(Equal(query)) + backend, err := cache.NewCacheBackend(config) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("unsupported cache backend type")) + Expect(backend).To(BeNil()) + }) }) }) - }) - Describe("UpdateWithResponse", func() { - Context("when cache is enabled", func() { - It("should update a pending request with response", func() { - model := "model-a" - query := "Test query for update" - requestBody := []byte(`{"model": "model-a"}`) - responseBody := []byte(`{"response": "test response"}`) - - // First add a pending request - _, err := semanticCache.AddPendingRequest(model, query, requestBody) - Expect(err).NotTo(HaveOccurred()) + Describe("ValidateCacheConfig", func() { + It("should validate enabled memory backend configuration", func() { + config := cache.CacheConfig{ + BackendType: cache.InMemoryCacheType, + Enabled: true, + SimilarityThreshold: 0.8, + MaxEntries: 1000, + TTLSeconds: 3600, + } - // Then update it with response - err = semanticCache.UpdateWithResponse(query, responseBody) + err := cache.ValidateCacheConfig(config) Expect(err).NotTo(HaveOccurred()) }) - It("should return error for non-existent pending request", func() { - query := "Non-existent query" - responseBody := []byte(`{"response": "test"}`) - - err := semanticCache.UpdateWithResponse(query, responseBody) - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("no pending request found")) - }) - }) + It("should validate disabled cache configuration", func() { + config := cache.CacheConfig{ + BackendType: cache.InMemoryCacheType, + Enabled: false, + SimilarityThreshold: 2.0, // Invalid, but should be ignored for disabled cache + MaxEntries: -1, // Invalid, but should be ignored for disabled cache + } - Context("when cache is disabled", func() { - BeforeEach(func() { - options := cache.SemanticCacheOptions{Enabled: false} - semanticCache = cache.NewSemanticCache(options) + err := cache.ValidateCacheConfig(config) + Expect(err).NotTo(HaveOccurred()) // Disabled cache should skip validation }) - It("should return immediately without error", func() { - query := "Test query" - responseBody := []byte(`{"response": "test"}`) + It("should return error for invalid similarity threshold", func() { + config := cache.CacheConfig{ + BackendType: cache.InMemoryCacheType, + Enabled: true, + SimilarityThreshold: 1.5, // Invalid: > 1.0 + MaxEntries: 1000, + TTLSeconds: 3600, + } - err := semanticCache.UpdateWithResponse(query, responseBody) - Expect(err).NotTo(HaveOccurred()) + err := cache.ValidateCacheConfig(config) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("similarity_threshold must be between 0.0 and 1.0")) }) - }) - }) - Describe("FindSimilar", func() { - Context("when cache is enabled", func() { - It("should return cache miss for empty cache", func() { - model := "model-a" - query := "What is AI?" + It("should return error for negative similarity threshold", func() { + config := cache.CacheConfig{ + BackendType: cache.InMemoryCacheType, + Enabled: true, + SimilarityThreshold: -0.1, // Invalid: < 0.0 + MaxEntries: 1000, + TTLSeconds: 3600, + } - response, found, err := semanticCache.FindSimilar(model, query) - Expect(err).NotTo(HaveOccurred()) - Expect(found).To(BeFalse()) - Expect(response).To(BeNil()) + err := cache.ValidateCacheConfig(config) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("similarity_threshold must be between 0.0 and 1.0")) }) - It("should handle empty query gracefully", func() { - model := "model-a" - query := "" - - response, found, err := semanticCache.FindSimilar(model, query) - // Should handle empty query - Expect(err).To(Or(BeNil(), HaveOccurred())) - if err == nil { - Expect(found).To(BeFalse()) - Expect(response).To(BeNil()) + It("should return error for negative TTL", func() { + config := cache.CacheConfig{ + BackendType: cache.InMemoryCacheType, + Enabled: true, + SimilarityThreshold: 0.8, + MaxEntries: 1000, + TTLSeconds: -1, // Invalid: negative TTL } - }) - Context("with entries in cache", func() { - BeforeEach(func() { - // Add some test entries if possible - model := "model-a" - query := "What is the weather?" - requestBody := []byte(`{"model": "model-a"}`) - responseBody := []byte(`{"weather": "sunny"}`) - - err := semanticCache.AddEntry(model, query, requestBody, responseBody) - if err != nil { - Skip("Skipping test due to candle_binding dependency") - } - }) - - It("should find similar entries based on model matching", func() { - model := "model-a" - query := "Weather information" - - _, _, err := semanticCache.FindSimilar(model, query) - Expect(err).NotTo(HaveOccurred()) - // Result depends on embedding similarity and threshold - }) - - It("should not find entries for different models", func() { - model := "model-b" // Different model - query := "What is the weather?" - - response, found, err := semanticCache.FindSimilar(model, query) - Expect(err).NotTo(HaveOccurred()) - Expect(found).To(BeFalse()) - Expect(response).To(BeNil()) - }) - }) - }) - - Context("when cache is disabled", func() { - BeforeEach(func() { - options := cache.SemanticCacheOptions{Enabled: false} - semanticCache = cache.NewSemanticCache(options) + err := cache.ValidateCacheConfig(config) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("ttl_seconds cannot be negative")) }) - It("should return cache miss immediately", func() { - model := "model-a" - query := "Any query" + It("should return error for negative max entries in memory backend", func() { + config := cache.CacheConfig{ + BackendType: cache.InMemoryCacheType, + Enabled: true, + SimilarityThreshold: 0.8, + MaxEntries: -1, // Invalid: negative max entries + TTLSeconds: 3600, + } - response, found, err := semanticCache.FindSimilar(model, query) - Expect(err).NotTo(HaveOccurred()) - Expect(found).To(BeFalse()) - Expect(response).To(BeNil()) + err := cache.ValidateCacheConfig(config) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("max_entries cannot be negative")) }) - }) - }) - Describe("TTL Functionality", func() { - Context("with TTL enabled", func() { - BeforeEach(func() { - options := cache.SemanticCacheOptions{ - SimilarityThreshold: 0.8, - MaxEntries: 100, - TTLSeconds: 1, // 1 second TTL for testing + It("should return error for Milvus backend without config path", func() { + config := cache.CacheConfig{ + BackendType: cache.MilvusCacheType, Enabled: true, + SimilarityThreshold: 0.8, + TTLSeconds: 3600, + // BackendConfigPath is missing } - semanticCache = cache.NewSemanticCache(options) - }) - It("should expire entries after TTL", func() { - model := "model-a" - query := "TTL test query" - requestBody := []byte(`{"model": "model-a"}`) - responseBody := []byte(`{"response": "test"}`) + err := cache.ValidateCacheConfig(config) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("backend_config_path is required for Milvus")) + }) - // Add entry - err := semanticCache.AddEntry(model, query, requestBody, responseBody) - if err != nil { - Skip("Skipping test due to candle_binding dependency") + It("should validate edge case values", func() { + config := cache.CacheConfig{ + BackendType: cache.InMemoryCacheType, + Enabled: true, + SimilarityThreshold: 0.0, // Valid: minimum threshold + MaxEntries: 0, // Valid: unlimited entries + TTLSeconds: 0, // Valid: no expiration } - // Wait for TTL to expire - time.Sleep(2 * time.Second) - - // Try to find the entry - should trigger cleanup and not find expired entry - _, _, err = semanticCache.FindSimilar(model, query) + err := cache.ValidateCacheConfig(config) Expect(err).NotTo(HaveOccurred()) - // Entry should be expired and not found, or found but will be cleaned up }) - }) - Context("without TTL", func() { - BeforeEach(func() { - options := cache.SemanticCacheOptions{ - SimilarityThreshold: 0.8, - MaxEntries: 100, - TTLSeconds: 0, // No TTL + It("should validate maximum threshold value", func() { + config := cache.CacheConfig{ + BackendType: cache.InMemoryCacheType, Enabled: true, + SimilarityThreshold: 1.0, // Valid: maximum threshold + MaxEntries: 10000, + TTLSeconds: 86400, } - semanticCache = cache.NewSemanticCache(options) - }) - It("should not expire entries", func() { - model := "model-a" - query := "No TTL test query" - requestBody := []byte(`{"model": "model-a"}`) - responseBody := []byte(`{"response": "test"}`) + err := cache.ValidateCacheConfig(config) + Expect(err).NotTo(HaveOccurred()) + }) + }) - // Add entry - err := semanticCache.AddEntry(model, query, requestBody, responseBody) - if err != nil { - Skip("Skipping test due to candle_binding dependency") - } + Describe("GetDefaultCacheConfig", func() { + It("should return valid default configuration", func() { + config := cache.GetDefaultCacheConfig() - // Wait some time - time.Sleep(100 * time.Millisecond) + Expect(config.BackendType).To(Equal(cache.InMemoryCacheType)) + Expect(config.Enabled).To(BeTrue()) + Expect(config.SimilarityThreshold).To(Equal(float32(0.8))) + Expect(config.MaxEntries).To(Equal(1000)) + Expect(config.TTLSeconds).To(Equal(3600)) + Expect(config.BackendConfigPath).To(BeEmpty()) - // Entry should still be searchable - _, _, err = semanticCache.FindSimilar(model, query) + // Default config should pass validation + err := cache.ValidateCacheConfig(config) Expect(err).NotTo(HaveOccurred()) - // Without TTL, entry should persist (subject to similarity matching) }) }) - }) - Describe("Concurrent Access", func() { - It("should handle concurrent AddEntry calls safely", func() { - const numGoroutines = 10 - var wg sync.WaitGroup - errors := make([]error, numGoroutines) - - wg.Add(numGoroutines) - for i := 0; i < numGoroutines; i++ { - go func(index int) { - defer wg.Done() - model := "model-a" - query := fmt.Sprintf("Concurrent query %d", index) - requestBody := []byte(fmt.Sprintf(`{"index": %d}`, index)) - responseBody := []byte(fmt.Sprintf(`{"result": %d}`, index)) - - err := semanticCache.AddEntry(model, query, requestBody, responseBody) - errors[index] = err - }(i) - } + Describe("GetAvailableCacheBackends", func() { + It("should return information about available backends", func() { + backends := cache.GetAvailableCacheBackends() + + Expect(backends).To(HaveLen(2)) // Memory and Milvus + + // Check memory backend info + memoryBackend := backends[0] + Expect(memoryBackend.Type).To(Equal(cache.InMemoryCacheType)) + Expect(memoryBackend.Name).To(Equal("In-Memory Cache")) + Expect(memoryBackend.Description).To(ContainSubstring("in-memory semantic cache")) + Expect(memoryBackend.Features).To(ContainElement("Fast access")) + Expect(memoryBackend.Features).To(ContainElement("No external dependencies")) + + // Check Milvus backend info + milvusBackend := backends[1] + Expect(milvusBackend.Type).To(Equal(cache.MilvusCacheType)) + Expect(milvusBackend.Name).To(Equal("Milvus Vector Database")) + Expect(milvusBackend.Description).To(ContainSubstring("Milvus vector database")) + Expect(milvusBackend.Features).To(ContainElement("Highly scalable")) + Expect(milvusBackend.Features).To(ContainElement("Persistent storage")) + }) + }) + }) - wg.Wait() + Describe("InMemoryCache", func() { + var ( + inMemoryCache cache.CacheBackend + ) - // Check that no race conditions occurred - // Some errors might occur due to candle_binding, but no panics should happen - for i := 0; i < numGoroutines; i++ { - // We don't assert on specific errors since candle_binding might not be available - // The important thing is that no race conditions or panics occurred + BeforeEach(func() { + options := cache.InMemoryCacheOptions{ + Enabled: true, + SimilarityThreshold: 0.8, + MaxEntries: 100, + TTLSeconds: 300, } + inMemoryCache = cache.NewInMemoryCache(options) }) - It("should handle concurrent FindSimilar calls safely", func() { - const numGoroutines = 10 - var wg sync.WaitGroup - results := make([]bool, numGoroutines) - errors := make([]error, numGoroutines) - - wg.Add(numGoroutines) - for i := 0; i < numGoroutines; i++ { - go func(index int) { - defer wg.Done() - model := "model-a" - query := fmt.Sprintf("Search query %d", index) - - _, found, err := semanticCache.FindSimilar(model, query) - results[index] = found - errors[index] = err - }(i) + AfterEach(func() { + if inMemoryCache != nil { + inMemoryCache.Close() } + // BERT model is initialized once per process, no need to reset + }) + + It("should implement CacheBackend interface", func() { + // Check that the concrete type implements the interface + var _ cache.CacheBackend = inMemoryCache + Expect(inMemoryCache).NotTo(BeNil()) + }) - wg.Wait() + It("should report enabled status correctly", func() { + Expect(inMemoryCache.IsEnabled()).To(BeTrue()) - // Check that no race conditions occurred - for i := 0; i < numGoroutines; i++ { - // We don't assert on specific results since cache is likely empty - // The important thing is that no race conditions or panics occurred + // Create disabled cache + disabledOptions := cache.InMemoryCacheOptions{ + Enabled: false, + SimilarityThreshold: 0.8, + MaxEntries: 100, + TTLSeconds: 300, } + disabledCache := cache.NewInMemoryCache(disabledOptions) + defer disabledCache.Close() + + Expect(disabledCache.IsEnabled()).To(BeFalse()) }) - It("should handle mixed concurrent operations safely", func() { - const numGoroutines = 20 - var wg sync.WaitGroup - - wg.Add(numGoroutines) - for i := 0; i < numGoroutines; i++ { - go func(index int) { - defer wg.Done() - model := "model-a" - query := fmt.Sprintf("Mixed operation query %d", index) - - if index%2 == 0 { - // Add entry - requestBody := []byte(fmt.Sprintf(`{"index": %d}`, index)) - responseBody := []byte(fmt.Sprintf(`{"result": %d}`, index)) - semanticCache.AddEntry(model, query, requestBody, responseBody) - } else { - // Search for similar - semanticCache.FindSimilar(model, query) - } - }(i) - } + It("should handle basic cache operations without embeddings", func() { + // Test GetStats on empty cache + stats := inMemoryCache.GetStats() + Expect(stats.TotalEntries).To(Equal(0)) + Expect(stats.HitCount).To(Equal(int64(0))) + Expect(stats.MissCount).To(Equal(int64(0))) + Expect(stats.HitRatio).To(Equal(0.0)) + }) + + It("should handle AddEntry operation with embeddings", func() { + err := inMemoryCache.AddEntry("test-model", "test query", []byte("request"), []byte("response")) + Expect(err).NotTo(HaveOccurred()) - wg.Wait() - // If we reach here without panic, the concurrent access handling is working + stats := inMemoryCache.GetStats() + Expect(stats.TotalEntries).To(Equal(1)) }) - }) - Describe("ExtractQueryFromOpenAIRequest", func() { - It("should extract model and query from valid OpenAI request", func() { - request := cache.OpenAIRequest{ - Model: "model-a", - Messages: []cache.ChatMessage{ - {Role: "system", Content: "You are a helpful assistant."}, - {Role: "user", Content: "What is the capital of France?"}, - {Role: "assistant", Content: "The capital of France is Paris."}, - {Role: "user", Content: "What about Germany?"}, - }, - } + It("should handle FindSimilar operation with embeddings", func() { + // First add an entry + err := inMemoryCache.AddEntry("test-model", "test query", []byte("request"), []byte("response")) + Expect(err).NotTo(HaveOccurred()) - requestBody, err := json.Marshal(request) + // Search for similar query + response, found, err := inMemoryCache.FindSimilar("test-model", "test query") Expect(err).NotTo(HaveOccurred()) + Expect(found).To(BeTrue()) // Should find exact match + Expect(response).To(Equal([]byte("response"))) - model, query, err := cache.ExtractQueryFromOpenAIRequest(requestBody) + // Search for different model (should not match) + response, found, err = inMemoryCache.FindSimilar("different-model", "test query") Expect(err).NotTo(HaveOccurred()) - Expect(model).To(Equal("model-a")) - Expect(query).To(Equal("What about Germany?")) // Should get the last user message + Expect(found).To(BeFalse()) // Should not match different model + Expect(response).To(BeNil()) }) - It("should handle request with only system messages", func() { - request := cache.OpenAIRequest{ - Model: "model-b", - Messages: []cache.ChatMessage{ - {Role: "system", Content: "You are a helpful assistant."}, - }, - } + It("should handle AddPendingRequest and UpdateWithResponse", func() { + query, err := inMemoryCache.AddPendingRequest("test-model", "test query", []byte("request")) + Expect(err).NotTo(HaveOccurred()) + Expect(query).To(Equal("test query")) - requestBody, err := json.Marshal(request) + // Update with response + err = inMemoryCache.UpdateWithResponse("test query", []byte("response")) Expect(err).NotTo(HaveOccurred()) - model, query, err := cache.ExtractQueryFromOpenAIRequest(requestBody) + // Should now be able to find it + response, found, err := inMemoryCache.FindSimilar("test-model", "test query") Expect(err).NotTo(HaveOccurred()) - Expect(model).To(Equal("model-b")) - Expect(query).To(BeEmpty()) // No user messages + Expect(found).To(BeTrue()) + Expect(response).To(Equal([]byte("response"))) }) - It("should handle request with multiple user messages", func() { - request := cache.OpenAIRequest{ - Model: "model-a", - Messages: []cache.ChatMessage{ - {Role: "user", Content: "First user message"}, - {Role: "assistant", Content: "Assistant response"}, - {Role: "user", Content: "Second user message"}, - {Role: "user", Content: "Third user message"}, - }, + It("should respect similarity threshold", func() { + // Add entry with a very high similarity threshold + highThresholdOptions := cache.InMemoryCacheOptions{ + Enabled: true, + SimilarityThreshold: 0.99, // Very high threshold + MaxEntries: 100, + TTLSeconds: 300, } + highThresholdCache := cache.NewInMemoryCache(highThresholdOptions) + defer highThresholdCache.Close() - requestBody, err := json.Marshal(request) + err := highThresholdCache.AddEntry("test-model", "machine learning", []byte("request"), []byte("ml response")) Expect(err).NotTo(HaveOccurred()) - model, query, err := cache.ExtractQueryFromOpenAIRequest(requestBody) + // Exact match should work + response, found, err := highThresholdCache.FindSimilar("test-model", "machine learning") Expect(err).NotTo(HaveOccurred()) - Expect(model).To(Equal("model-a")) - Expect(query).To(Equal("Third user message")) // Should get the last user message + Expect(found).To(BeTrue()) + Expect(response).To(Equal([]byte("ml response"))) + + // Different query should not match due to high threshold + response, found, err = highThresholdCache.FindSimilar("test-model", "artificial intelligence") + Expect(err).NotTo(HaveOccurred()) + Expect(found).To(BeFalse()) + Expect(response).To(BeNil()) }) - It("should handle empty messages array", func() { - request := cache.OpenAIRequest{ - Model: "model-a", - Messages: []cache.ChatMessage{}, - } + It("should track hit and miss statistics", func() { + // Add an entry with a specific query + err := inMemoryCache.AddEntry("test-model", "What is machine learning?", []byte("request"), []byte("ML is a subset of AI")) + Expect(err).NotTo(HaveOccurred()) - requestBody, err := json.Marshal(request) + // Search for the exact cached query (should be a hit) + response, found, err := inMemoryCache.FindSimilar("test-model", "What is machine learning?") Expect(err).NotTo(HaveOccurred()) + Expect(found).To(BeTrue()) + Expect(response).To(Equal([]byte("ML is a subset of AI"))) - model, query, err := cache.ExtractQueryFromOpenAIRequest(requestBody) + // Search for a completely unrelated query (should be a miss) + response, found, err = inMemoryCache.FindSimilar("test-model", "How do I cook pasta?") Expect(err).NotTo(HaveOccurred()) - Expect(model).To(Equal("model-a")) - Expect(query).To(BeEmpty()) + Expect(found).To(BeFalse()) + Expect(response).To(BeNil()) + + // Check statistics + stats := inMemoryCache.GetStats() + Expect(stats.HitCount).To(Equal(int64(1))) + Expect(stats.MissCount).To(Equal(int64(1))) + Expect(stats.HitRatio).To(Equal(0.5)) }) - It("should return error for invalid JSON", func() { - invalidJSON := []byte(`{"model": "model-a", "messages": [invalid json}`) - - model, query, err := cache.ExtractQueryFromOpenAIRequest(invalidJSON) + It("should handle error when updating non-existent pending request", func() { + err := inMemoryCache.UpdateWithResponse("non-existent-query", []byte("response")) Expect(err).To(HaveOccurred()) - Expect(model).To(BeEmpty()) - Expect(query).To(BeEmpty()) - Expect(err.Error()).To(ContainSubstring("invalid request body")) + Expect(err.Error()).To(ContainSubstring("no pending request found")) }) - It("should handle missing model field", func() { - request := map[string]interface{}{ - "messages": []cache.ChatMessage{ - {Role: "user", Content: "Test message"}, - }, - } - - requestBody, err := json.Marshal(request) + It("should handle close operation", func() { + err := inMemoryCache.Close() Expect(err).NotTo(HaveOccurred()) - model, query, err := cache.ExtractQueryFromOpenAIRequest(requestBody) - Expect(err).NotTo(HaveOccurred()) - Expect(model).To(BeEmpty()) // Missing model field - Expect(query).To(Equal("Test message")) + // Stats should show zero entries after close + stats := inMemoryCache.GetStats() + Expect(stats.TotalEntries).To(Equal(0)) }) - It("should handle request with empty content", func() { - request := cache.OpenAIRequest{ - Model: "model-a", - Messages: []cache.ChatMessage{ - {Role: "user", Content: ""}, - {Role: "user", Content: "Non-empty message"}, - }, + It("should handle disabled cache operations gracefully", func() { + disabledOptions := cache.InMemoryCacheOptions{ + Enabled: false, + SimilarityThreshold: 0.8, + MaxEntries: 100, + TTLSeconds: 300, } + disabledCache := cache.NewInMemoryCache(disabledOptions) + defer disabledCache.Close() - requestBody, err := json.Marshal(request) + // Disabled cache operations should not error but should be no-ops + // They should NOT try to generate embeddings + query, err := disabledCache.AddPendingRequest("model", "query", []byte("request")) Expect(err).NotTo(HaveOccurred()) + Expect(query).To(Equal("query")) - model, query, err := cache.ExtractQueryFromOpenAIRequest(requestBody) + err = disabledCache.UpdateWithResponse("query", []byte("response")) Expect(err).NotTo(HaveOccurred()) - Expect(model).To(Equal("model-a")) - Expect(query).To(Equal("Non-empty message")) // Should get the last non-empty user message - }) - }) - Describe("Edge Cases and Error Conditions", func() { - It("should handle very large request/response bodies", func() { - model := "model-a" - query := "Large data test" - largeData := make([]byte, 1024*1024) // 1MB of data - for i := range largeData { - largeData[i] = byte(i % 256) - } + err = disabledCache.AddEntry("model", "query", []byte("request"), []byte("response")) + Expect(err).NotTo(HaveOccurred()) - err := semanticCache.AddEntry(model, query, largeData, largeData) - // Should handle large data gracefully - Expect(err).To(Or(BeNil(), HaveOccurred())) + response, found, err := disabledCache.FindSimilar("model", "query") + Expect(err).NotTo(HaveOccurred()) + Expect(found).To(BeFalse()) + Expect(response).To(BeNil()) + + // Stats should show zero activity + stats := disabledCache.GetStats() + Expect(stats.TotalEntries).To(Equal(0)) + Expect(stats.HitCount).To(Equal(int64(0))) + Expect(stats.MissCount).To(Equal(int64(0))) }) + }) - It("should handle special characters in queries", func() { - model := "model-a" - query := "Query with special chars: 你好, émoji 🚀, and unicode ∀∃∅" - requestBody := []byte(`{"special": "chars"}`) - responseBody := []byte(`{"response": "special"}`) - - err := semanticCache.AddEntry(model, query, requestBody, responseBody) - Expect(err).To(Or(BeNil(), HaveOccurred())) + Describe("Cache Backend Types", func() { + It("should have correct backend type constants", func() { + Expect(cache.InMemoryCacheType).To(Equal(cache.CacheBackendType("memory"))) + Expect(cache.MilvusCacheType).To(Equal(cache.CacheBackendType("milvus"))) }) + }) - It("should handle very long queries", func() { - model := "model-a" - query := string(make([]byte, 10000)) // Very long query - for i := range query { - query = query[:i] + "a" + Describe("Cache Configuration Types", func() { + It("should support all required configuration fields", func() { + config := cache.CacheConfig{ + BackendType: cache.MilvusCacheType, + Enabled: true, + SimilarityThreshold: 0.9, + MaxEntries: 2000, + TTLSeconds: 7200, + BackendConfigPath: "config/cache/milvus.yaml", } - requestBody := []byte(`{"long": "query"}`) - responseBody := []byte(`{"response": "long"}`) - - err := semanticCache.AddEntry(model, query, requestBody, responseBody) - Expect(err).To(Or(BeNil(), HaveOccurred())) - }) - - It("should handle nil request/response bodies", func() { - model := "model-a" - query := "Nil test" - err := semanticCache.AddEntry(model, query, nil, nil) - Expect(err).To(Or(BeNil(), HaveOccurred())) + // Verify all fields are accessible + Expect(string(config.BackendType)).To(Equal("milvus")) + Expect(config.Enabled).To(BeTrue()) + Expect(config.SimilarityThreshold).To(Equal(float32(0.9))) + Expect(config.MaxEntries).To(Equal(2000)) + Expect(config.TTLSeconds).To(Equal(7200)) + Expect(config.BackendConfigPath).To(Equal("config/cache/milvus.yaml")) }) }) - Describe("Similarity Threshold Edge Cases", func() { - Context("with very low threshold", func() { - BeforeEach(func() { - options := cache.SemanticCacheOptions{ - SimilarityThreshold: 0.0, // Very low threshold - MaxEntries: 100, - TTLSeconds: 0, - Enabled: true, - } - semanticCache = cache.NewSemanticCache(options) - }) - - It("should potentially match more entries", func() { - // Add an entry - model := "model-a" - query1 := "What is AI?" - requestBody := []byte(`{"model": "model-a"}`) - responseBody := []byte(`{"response": "AI info"}`) - - err := semanticCache.AddEntry(model, query1, requestBody, responseBody) - if err != nil { - Skip("Skipping test due to candle_binding dependency") - } + Describe("Cache Stats", func() { + It("should calculate hit ratio correctly", func() { + stats := cache.CacheStats{ + TotalEntries: 100, + HitCount: 75, + MissCount: 25, + HitRatio: 0.75, + } - // Search with different query - query2 := "Completely different query" - _, _, err = semanticCache.FindSimilar(model, query2) - Expect(err).NotTo(HaveOccurred()) - // With very low threshold, might find matches - }) + Expect(stats.HitRatio).To(Equal(0.75)) + Expect(stats.HitCount + stats.MissCount).To(Equal(int64(100))) }) - Context("with very high threshold", func() { - BeforeEach(func() { - options := cache.SemanticCacheOptions{ - SimilarityThreshold: 0.999, // Very high threshold - MaxEntries: 100, - TTLSeconds: 0, - Enabled: true, - } - semanticCache = cache.NewSemanticCache(options) - }) - - It("should rarely match entries", func() { - // Add an entry - model := "model-a" - query1 := "What is AI?" - requestBody := []byte(`{"model": "model-a"}`) - responseBody := []byte(`{"response": "AI info"}`) - - err := semanticCache.AddEntry(model, query1, requestBody, responseBody) - if err != nil { - Skip("Skipping test due to candle_binding dependency") - } + It("should handle zero values correctly", func() { + stats := cache.CacheStats{ + TotalEntries: 0, + HitCount: 0, + MissCount: 0, + HitRatio: 0.0, + } - // Search with slightly different query - query2 := "What is artificial intelligence?" - _, found, err := semanticCache.FindSimilar(model, query2) - Expect(err).NotTo(HaveOccurred()) - // With very high threshold, should rarely find matches - Expect(found).To(BeFalse()) - }) + Expect(stats.HitRatio).To(Equal(0.0)) + Expect(stats.TotalEntries).To(Equal(0)) }) }) }) diff --git a/src/semantic-router/pkg/cache/inmemory_cache.go b/src/semantic-router/pkg/cache/inmemory_cache.go new file mode 100644 index 00000000..9928683d --- /dev/null +++ b/src/semantic-router/pkg/cache/inmemory_cache.go @@ -0,0 +1,404 @@ +//go:build !windows && cgo +// +build !windows,cgo + +package cache + +import ( + "fmt" + "sort" + "sync" + "sync/atomic" + "time" + + candle_binding "github.com/vllm-project/semantic-router/candle-binding" + "github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics" + "github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability" +) + +// InMemoryCache provides a high-performance semantic cache using BERT embeddings in memory +type InMemoryCache struct { + entries []CacheEntry + mu sync.RWMutex + similarityThreshold float32 + maxEntries int + ttlSeconds int + enabled bool + hitCount int64 + missCount int64 + lastCleanupTime *time.Time +} + +// InMemoryCacheOptions contains configuration parameters for the in-memory cache +type InMemoryCacheOptions struct { + SimilarityThreshold float32 + MaxEntries int + TTLSeconds int + Enabled bool +} + +// NewInMemoryCache initializes a new in-memory semantic cache instance +func NewInMemoryCache(options InMemoryCacheOptions) *InMemoryCache { + observability.Debugf("Initializing in-memory cache: enabled=%t, maxEntries=%d, ttlSeconds=%d, threshold=%.3f", + options.Enabled, options.MaxEntries, options.TTLSeconds, options.SimilarityThreshold) + return &InMemoryCache{ + entries: []CacheEntry{}, + similarityThreshold: options.SimilarityThreshold, + maxEntries: options.MaxEntries, + ttlSeconds: options.TTLSeconds, + enabled: options.Enabled, + } +} + +// IsEnabled returns the current cache activation status +func (c *InMemoryCache) IsEnabled() bool { + return c.enabled +} + +// AddPendingRequest stores a request that is awaiting its response +func (c *InMemoryCache) AddPendingRequest(model string, query string, requestBody []byte) (string, error) { + start := time.Now() + + if !c.enabled { + return query, nil + } + + // Generate semantic embedding for the query + embedding, err := candle_binding.GetEmbedding(query, 0) // Auto-detect dimension + if err != nil { + metrics.RecordCacheOperation("memory", "add_pending", "error", time.Since(start).Seconds()) + return "", fmt.Errorf("failed to generate embedding: %w", err) + } + + c.mu.Lock() + defer c.mu.Unlock() + + // Remove expired entries to maintain cache hygiene + c.cleanupExpiredEntries() + + // Create cache entry for the pending request + entry := CacheEntry{ + RequestBody: requestBody, + Model: model, + Query: query, + Embedding: embedding, + Timestamp: time.Now(), + } + + c.entries = append(c.entries, entry) + observability.Debugf("InMemoryCache.AddPendingRequest: added pending entry (total entries: %d, embedding_dim: %d)", + len(c.entries), len(embedding)) + + // Apply entry limit to prevent unbounded memory growth + if c.maxEntries > 0 && len(c.entries) > c.maxEntries { + // Sort entries by timestamp to identify oldest + sort.Slice(c.entries, func(i, j int) bool { + return c.entries[i].Timestamp.Before(c.entries[j].Timestamp) + }) + // Keep only the most recent entries + removedCount := len(c.entries) - c.maxEntries + c.entries = c.entries[len(c.entries)-c.maxEntries:] + observability.Debugf("InMemoryCache: size limit exceeded, removed %d oldest entries (limit: %d)", + removedCount, c.maxEntries) + observability.LogEvent("cache_trimmed", map[string]interface{}{ + "backend": "memory", + "removed_count": removedCount, + "max_entries": c.maxEntries, + }) + } + + // Record metrics + metrics.RecordCacheOperation("memory", "add_pending", "success", time.Since(start).Seconds()) + metrics.UpdateCacheEntries("memory", len(c.entries)) + + return query, nil +} + +// UpdateWithResponse completes a pending request by adding the response +func (c *InMemoryCache) UpdateWithResponse(query string, responseBody []byte) error { + start := time.Now() + + if !c.enabled { + return nil + } + + c.mu.Lock() + defer c.mu.Unlock() + + // Clean up expired entries during the update + c.cleanupExpiredEntries() + + // Locate the pending request and complete it + for i, entry := range c.entries { + if entry.Query == query && entry.ResponseBody == nil { + // Complete the cache entry with the response + c.entries[i].ResponseBody = responseBody + c.entries[i].Timestamp = time.Now() + observability.Debugf("InMemoryCache.UpdateWithResponse: updated entry with response (response_size: %d bytes)", + len(responseBody)) + + // Record successful completion + metrics.RecordCacheOperation("memory", "update_response", "success", time.Since(start).Seconds()) + return nil + } + } + + // No matching pending request found + metrics.RecordCacheOperation("memory", "update_response", "error", time.Since(start).Seconds()) + return fmt.Errorf("no pending request found for query: %s", query) +} + +// AddEntry stores a complete request-response pair in the cache +func (c *InMemoryCache) AddEntry(model string, query string, requestBody, responseBody []byte) error { + start := time.Now() + + if !c.enabled { + return nil + } + + // Generate semantic embedding for the query + embedding, err := candle_binding.GetEmbedding(query, 0) // Auto-detect dimension + if err != nil { + metrics.RecordCacheOperation("memory", "add_entry", "error", time.Since(start).Seconds()) + return fmt.Errorf("failed to generate embedding: %w", err) + } + + entry := CacheEntry{ + RequestBody: requestBody, + ResponseBody: responseBody, + Model: model, + Query: query, + Embedding: embedding, + Timestamp: time.Now(), + } + + c.mu.Lock() + defer c.mu.Unlock() + + // Clean up expired entries before adding new one + c.cleanupExpiredEntries() + + c.entries = append(c.entries, entry) + observability.Debugf("InMemoryCache.AddEntry: added complete entry (total entries: %d, request_size: %d, response_size: %d)", + len(c.entries), len(requestBody), len(responseBody)) + observability.LogEvent("cache_entry_added", map[string]interface{}{ + "backend": "memory", + "query": query, + "model": model, + }) + + // Apply entry limit if configured + if c.maxEntries > 0 && len(c.entries) > c.maxEntries { + // Sort by timestamp to identify oldest entries + sort.Slice(c.entries, func(i, j int) bool { + return c.entries[i].Timestamp.Before(c.entries[j].Timestamp) + }) + // Keep only the most recent entries + c.entries = c.entries[len(c.entries)-c.maxEntries:] + } + + // Record success metrics + metrics.RecordCacheOperation("memory", "add_entry", "success", time.Since(start).Seconds()) + metrics.UpdateCacheEntries("memory", len(c.entries)) + + return nil +} + +// FindSimilar searches for semantically similar cached requests +func (c *InMemoryCache) FindSimilar(model string, query string) ([]byte, bool, error) { + start := time.Now() + + if !c.enabled { + observability.Debugf("InMemoryCache.FindSimilar: cache disabled") + return nil, false, nil + } + queryPreview := query + if len(query) > 50 { + queryPreview = query[:50] + "..." + } + observability.Debugf("InMemoryCache.FindSimilar: searching for model='%s', query='%s' (len=%d chars)", + model, queryPreview, len(query)) + + // Generate semantic embedding for similarity comparison + queryEmbedding, err := candle_binding.GetEmbedding(query, 0) // Auto-detect dimension + if err != nil { + metrics.RecordCacheOperation("memory", "find_similar", "error", time.Since(start).Seconds()) + return nil, false, fmt.Errorf("failed to generate embedding: %w", err) + } + + c.mu.RLock() + defer c.mu.RUnlock() + + // Check for expired entries during search + c.cleanupExpiredEntriesReadOnly() + + type SimilarityResult struct { + Entry CacheEntry + Similarity float32 + } + + // Compare with completed entries for the same model + results := make([]SimilarityResult, 0, len(c.entries)) + for _, entry := range c.entries { + if entry.ResponseBody == nil { + continue // Skip incomplete entries + } + + // Only consider entries for the same model + if entry.Model != model { + continue + } + + // Compute semantic similarity using dot product + var dotProduct float32 + for i := 0; i < len(queryEmbedding) && i < len(entry.Embedding); i++ { + dotProduct += queryEmbedding[i] * entry.Embedding[i] + } + + results = append(results, SimilarityResult{ + Entry: entry, + Similarity: dotProduct, + }) + } + + // Handle case where no suitable entries exist + if len(results) == 0 { + atomic.AddInt64(&c.missCount, 1) + observability.Debugf("InMemoryCache.FindSimilar: no entries found with responses (total entries: %d)", len(c.entries)) + metrics.RecordCacheOperation("memory", "find_similar", "miss", time.Since(start).Seconds()) + metrics.RecordCacheMiss() + return nil, false, nil + } + + // Sort results by similarity score (highest first) + sort.Slice(results, func(i, j int) bool { + return results[i].Similarity > results[j].Similarity + }) + + // Check if the best match meets the similarity threshold + if results[0].Similarity >= c.similarityThreshold { + atomic.AddInt64(&c.hitCount, 1) + observability.Debugf("InMemoryCache.FindSimilar: CACHE HIT - similarity=%.4f >= threshold=%.4f, response_size=%d bytes", + results[0].Similarity, c.similarityThreshold, len(results[0].Entry.ResponseBody)) + observability.LogEvent("cache_hit", map[string]interface{}{ + "backend": "memory", + "similarity": results[0].Similarity, + "threshold": c.similarityThreshold, + "model": model, + }) + metrics.RecordCacheOperation("memory", "find_similar", "hit", time.Since(start).Seconds()) + metrics.RecordCacheHit() + return results[0].Entry.ResponseBody, true, nil + } + + atomic.AddInt64(&c.missCount, 1) + observability.Debugf("InMemoryCache.FindSimilar: CACHE MISS - best_similarity=%.4f < threshold=%.4f (checked %d entries)", + results[0].Similarity, c.similarityThreshold, len(results)) + observability.LogEvent("cache_miss", map[string]interface{}{ + "backend": "memory", + "best_similarity": results[0].Similarity, + "threshold": c.similarityThreshold, + "model": model, + "entries_checked": len(results), + }) + metrics.RecordCacheOperation("memory", "find_similar", "miss", time.Since(start).Seconds()) + metrics.RecordCacheMiss() + return nil, false, nil +} + +// Close releases all resources held by the cache +func (c *InMemoryCache) Close() error { + c.mu.Lock() + defer c.mu.Unlock() + + // Clear all entries to free memory + c.entries = nil + return nil +} + +// GetStats provides current cache performance metrics +func (c *InMemoryCache) GetStats() CacheStats { + c.mu.RLock() + defer c.mu.RUnlock() + + hits := atomic.LoadInt64(&c.hitCount) + misses := atomic.LoadInt64(&c.missCount) + total := hits + misses + + var hitRatio float64 + if total > 0 { + hitRatio = float64(hits) / float64(total) + } + + stats := CacheStats{ + TotalEntries: len(c.entries), + HitCount: hits, + MissCount: misses, + HitRatio: hitRatio, + } + + if c.lastCleanupTime != nil { + stats.LastCleanupTime = c.lastCleanupTime + } + + return stats +} + +// cleanupExpiredEntries removes entries that have exceeded their TTL +// Caller must hold a write lock +func (c *InMemoryCache) cleanupExpiredEntries() { + if c.ttlSeconds <= 0 { + return + } + + now := time.Now() + validEntries := make([]CacheEntry, 0, len(c.entries)) + + for _, entry := range c.entries { + // Retain entries that are still within their TTL + if now.Sub(entry.Timestamp).Seconds() < float64(c.ttlSeconds) { + validEntries = append(validEntries, entry) + } + } + + if len(validEntries) < len(c.entries) { + expiredCount := len(c.entries) - len(validEntries) + observability.Debugf("InMemoryCache: TTL cleanup removed %d expired entries (remaining: %d)", + expiredCount, len(validEntries)) + observability.LogEvent("cache_cleanup", map[string]interface{}{ + "backend": "memory", + "expired_count": expiredCount, + "remaining_count": len(validEntries), + "ttl_seconds": c.ttlSeconds, + }) + c.entries = validEntries + cleanupTime := time.Now() + c.lastCleanupTime = &cleanupTime + } +} + +// cleanupExpiredEntriesReadOnly identifies expired entries without modifying the cache +// Used during read operations with only a read lock held +func (c *InMemoryCache) cleanupExpiredEntriesReadOnly() { + if c.ttlSeconds <= 0 { + return + } + + now := time.Now() + expiredCount := 0 + + for _, entry := range c.entries { + if now.Sub(entry.Timestamp).Seconds() >= float64(c.ttlSeconds) { + expiredCount++ + } + } + + if expiredCount > 0 { + observability.Debugf("InMemoryCache: found %d expired entries during read (TTL: %ds)", + expiredCount, c.ttlSeconds) + observability.LogEvent("cache_expired_entries_found", map[string]interface{}{ + "backend": "memory", + "expired_count": expiredCount, + "ttl_seconds": c.ttlSeconds, + }) + } +} diff --git a/src/semantic-router/pkg/cache/milvus_cache.go b/src/semantic-router/pkg/cache/milvus_cache.go new file mode 100644 index 00000000..a4edcde3 --- /dev/null +++ b/src/semantic-router/pkg/cache/milvus_cache.go @@ -0,0 +1,676 @@ +package cache + +import ( + "context" + "crypto/md5" + "fmt" + "os" + "sync" + "sync/atomic" + "time" + + "github.com/milvus-io/milvus-sdk-go/v2/client" + "github.com/milvus-io/milvus-sdk-go/v2/entity" + candle_binding "github.com/vllm-project/semantic-router/candle-binding" + "github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics" + "github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability" + "gopkg.in/yaml.v3" +) + +// MilvusConfig defines the complete configuration structure for Milvus cache backend +type MilvusConfig struct { + Connection struct { + Host string `yaml:"host"` + Port int `yaml:"port"` + Database string `yaml:"database"` + Timeout int `yaml:"timeout"` + Auth struct { + Enabled bool `yaml:"enabled"` + Username string `yaml:"username"` + Password string `yaml:"password"` + } `yaml:"auth"` + TLS struct { + Enabled bool `yaml:"enabled"` + CertFile string `yaml:"cert_file"` + KeyFile string `yaml:"key_file"` + CAFile string `yaml:"ca_file"` + } `yaml:"tls"` + } `yaml:"connection"` + Collection struct { + Name string `yaml:"name"` + Description string `yaml:"description"` + VectorField struct { + Name string `yaml:"name"` + Dimension int `yaml:"dimension"` + MetricType string `yaml:"metric_type"` + } `yaml:"vector_field"` + Index struct { + Type string `yaml:"type"` + Params struct { + M int `yaml:"M"` + EfConstruction int `yaml:"efConstruction"` + } `yaml:"params"` + } `yaml:"index"` + } `yaml:"collection"` + Search struct { + Params struct { + Ef int `yaml:"ef"` + } `yaml:"params"` + TopK int `yaml:"topk"` + ConsistencyLevel string `yaml:"consistency_level"` + } `yaml:"search"` + Performance struct { + ConnectionPool struct { + MaxConnections int `yaml:"max_connections"` + MaxIdleConnections int `yaml:"max_idle_connections"` + AcquireTimeout int `yaml:"acquire_timeout"` + } `yaml:"connection_pool"` + Batch struct { + InsertBatchSize int `yaml:"insert_batch_size"` + Timeout int `yaml:"timeout"` + } `yaml:"batch"` + } `yaml:"performance"` + DataManagement struct { + TTL struct { + Enabled bool `yaml:"enabled"` + TimestampField string `yaml:"timestamp_field"` + CleanupInterval int `yaml:"cleanup_interval"` + } `yaml:"ttl"` + Compaction struct { + Enabled bool `yaml:"enabled"` + Interval int `yaml:"interval"` + } `yaml:"compaction"` + } `yaml:"data_management"` + Logging struct { + Level string `yaml:"level"` + EnableQueryLog bool `yaml:"enable_query_log"` + EnableMetrics bool `yaml:"enable_metrics"` + } `yaml:"logging"` + Development struct { + DropCollectionOnStartup bool `yaml:"drop_collection_on_startup"` + AutoCreateCollection bool `yaml:"auto_create_collection"` + VerboseErrors bool `yaml:"verbose_errors"` + } `yaml:"development"` +} + +// MilvusCache provides a scalable semantic cache implementation using Milvus vector database +type MilvusCache struct { + client client.Client + config *MilvusConfig + collectionName string + similarityThreshold float32 + ttlSeconds int + enabled bool + hitCount int64 + missCount int64 + lastCleanupTime *time.Time + mu sync.RWMutex +} + +// MilvusCacheOptions contains configuration parameters for Milvus cache initialization +type MilvusCacheOptions struct { + SimilarityThreshold float32 + TTLSeconds int + Enabled bool + ConfigPath string +} + +// NewMilvusCache initializes a new Milvus-backed semantic cache instance +func NewMilvusCache(options MilvusCacheOptions) (*MilvusCache, error) { + if !options.Enabled { + observability.Debugf("MilvusCache: disabled, returning stub") + return &MilvusCache{ + enabled: false, + }, nil + } + + // Load Milvus configuration from file + observability.Debugf("MilvusCache: loading config from %s", options.ConfigPath) + config, err := loadMilvusConfig(options.ConfigPath) + if err != nil { + observability.Debugf("MilvusCache: failed to load config: %v", err) + return nil, fmt.Errorf("failed to load Milvus config: %w", err) + } + observability.Debugf("MilvusCache: config loaded - host=%s:%d, collection=%s, dimension=auto-detect", + config.Connection.Host, config.Connection.Port, config.Collection.Name) + + // Establish connection to Milvus server + connectionString := fmt.Sprintf("%s:%d", config.Connection.Host, config.Connection.Port) + observability.Debugf("MilvusCache: connecting to Milvus at %s", connectionString) + milvusClient, err := client.NewGrpcClient(context.Background(), connectionString) + if err != nil { + observability.Debugf("MilvusCache: failed to connect: %v", err) + return nil, fmt.Errorf("failed to create Milvus client: %w", err) + } + observability.Debugf("MilvusCache: successfully connected to Milvus") + + cache := &MilvusCache{ + client: milvusClient, + config: config, + collectionName: config.Collection.Name, + similarityThreshold: options.SimilarityThreshold, + ttlSeconds: options.TTLSeconds, + enabled: options.Enabled, + } + + // Set up the collection for caching + observability.Debugf("MilvusCache: initializing collection '%s'", config.Collection.Name) + if err := cache.initializeCollection(); err != nil { + observability.Debugf("MilvusCache: failed to initialize collection: %v", err) + milvusClient.Close() + return nil, fmt.Errorf("failed to initialize collection: %w", err) + } + observability.Debugf("MilvusCache: initialization complete") + + return cache, nil +} + +// loadMilvusConfig reads and parses the Milvus configuration from file +func loadMilvusConfig(configPath string) (*MilvusConfig, error) { + if configPath == "" { + return nil, fmt.Errorf("Milvus config path is required") + } + + data, err := os.ReadFile(configPath) + if err != nil { + return nil, fmt.Errorf("failed to read config file: %w", err) + } + + var config MilvusConfig + if err := yaml.Unmarshal(data, &config); err != nil { + return nil, fmt.Errorf("failed to parse config file: %w", err) + } + + return &config, nil +} + +// initializeCollection sets up the Milvus collection and index structures +func (c *MilvusCache) initializeCollection() error { + ctx := context.Background() + + // Verify collection existence + hasCollection, err := c.client.HasCollection(ctx, c.collectionName) + if err != nil { + return fmt.Errorf("failed to check collection existence: %w", err) + } + + // Handle development mode collection reset + if c.config.Development.DropCollectionOnStartup && hasCollection { + if err := c.client.DropCollection(ctx, c.collectionName); err != nil { + observability.Debugf("MilvusCache: failed to drop collection: %v", err) + return fmt.Errorf("failed to drop collection: %w", err) + } + hasCollection = false + observability.Debugf("MilvusCache: dropped existing collection '%s' for development", c.collectionName) + observability.LogEvent("collection_dropped", map[string]interface{}{ + "backend": "milvus", + "collection": c.collectionName, + "reason": "development_mode", + }) + } + + // Create collection if it doesn't exist + if !hasCollection { + if !c.config.Development.AutoCreateCollection { + return fmt.Errorf("collection %s does not exist and auto-creation is disabled", c.collectionName) + } + + if err := c.createCollection(); err != nil { + observability.Debugf("MilvusCache: failed to create collection: %v", err) + return fmt.Errorf("failed to create collection: %w", err) + } + observability.Debugf("MilvusCache: created new collection '%s' with dimension %d", + c.collectionName, c.config.Collection.VectorField.Dimension) + observability.LogEvent("collection_created", map[string]interface{}{ + "backend": "milvus", + "collection": c.collectionName, + "dimension": c.config.Collection.VectorField.Dimension, + }) + } + + // Load collection into memory for queries + observability.Debugf("MilvusCache: loading collection '%s' into memory", c.collectionName) + if err := c.client.LoadCollection(ctx, c.collectionName, false); err != nil { + observability.Debugf("MilvusCache: failed to load collection: %v", err) + return fmt.Errorf("failed to load collection: %w", err) + } + observability.Debugf("MilvusCache: collection loaded successfully") + + return nil +} + +// createCollection builds the Milvus collection with the appropriate schema +func (c *MilvusCache) createCollection() error { + ctx := context.Background() + + // Determine embedding dimension automatically + testEmbedding, err := candle_binding.GetEmbedding("test", 0) // Auto-detect + if err != nil { + return fmt.Errorf("failed to detect embedding dimension: %w", err) + } + actualDimension := len(testEmbedding) + + observability.Debugf("MilvusCache.createCollection: auto-detected embedding dimension: %d", actualDimension) + + // Define schema with auto-detected dimension + schema := &entity.Schema{ + CollectionName: c.collectionName, + Description: c.config.Collection.Description, + Fields: []*entity.Field{ + { + Name: "id", + DataType: entity.FieldTypeVarChar, + PrimaryKey: true, + TypeParams: map[string]string{"max_length": "64"}, + }, + { + Name: "model", + DataType: entity.FieldTypeVarChar, + TypeParams: map[string]string{"max_length": "256"}, + }, + { + Name: "query", + DataType: entity.FieldTypeVarChar, + TypeParams: map[string]string{"max_length": "65535"}, + }, + { + Name: "request_body", + DataType: entity.FieldTypeVarChar, + TypeParams: map[string]string{"max_length": "65535"}, + }, + { + Name: "response_body", + DataType: entity.FieldTypeVarChar, + TypeParams: map[string]string{"max_length": "65535"}, + }, + { + Name: c.config.Collection.VectorField.Name, + DataType: entity.FieldTypeFloatVector, + TypeParams: map[string]string{ + "dim": fmt.Sprintf("%d", actualDimension), // Use auto-detected dimension + }, + }, + { + Name: "timestamp", + DataType: entity.FieldTypeInt64, + }, + }, + } + + // Create collection + if err := c.client.CreateCollection(ctx, schema, 1); err != nil { + return err + } + + // Create index + indexParams := map[string]string{ + "index_type": c.config.Collection.Index.Type, + "metric_type": c.config.Collection.VectorField.MetricType, + "params": fmt.Sprintf(`{"M": %d, "efConstruction": %d}`, + c.config.Collection.Index.Params.M, + c.config.Collection.Index.Params.EfConstruction), + } + + observability.Debugf("MilvusCache.createCollection: creating index for %d-dimensional vectors", actualDimension) + + // Create index with updated API + index := entity.NewGenericIndex(c.config.Collection.VectorField.Name, entity.IndexType(c.config.Collection.Index.Type), indexParams) + if err := c.client.CreateIndex(ctx, c.collectionName, c.config.Collection.VectorField.Name, index, false); err != nil { + return err + } + + return nil +} + +// IsEnabled returns the current cache activation status +func (c *MilvusCache) IsEnabled() bool { + return c.enabled +} + +// AddPendingRequest stores a request that is awaiting its response +func (c *MilvusCache) AddPendingRequest(model string, query string, requestBody []byte) (string, error) { + start := time.Now() + + if !c.enabled { + return query, nil + } + + // Store incomplete entry for later completion with response + result, err := c.addEntry(model, query, requestBody, nil) + + if err != nil { + metrics.RecordCacheOperation("milvus", "add_pending", "error", time.Since(start).Seconds()) + } else { + metrics.RecordCacheOperation("milvus", "add_pending", "success", time.Since(start).Seconds()) + } + + return result, err +} + +// UpdateWithResponse completes a pending request by adding the response +func (c *MilvusCache) UpdateWithResponse(query string, responseBody []byte) error { + start := time.Now() + + if !c.enabled { + return nil + } + + queryPreview := query + if len(query) > 50 { + queryPreview = query[:50] + "..." + } + + observability.Debugf("MilvusCache.UpdateWithResponse: updating pending entry (query: %s, response_size: %d)", + queryPreview, len(responseBody)) + + // Find the pending entry and complete it with the response + // Query for the incomplete entry to retrieve its metadata + ctx := context.Background() + queryExpr := fmt.Sprintf("query == \"%s\" && response_body == \"\"", query) + + observability.Debugf("MilvusCache.UpdateWithResponse: searching for pending entry with expr: %s", queryExpr) + + results, err := c.client.Query(ctx, c.collectionName, []string{}, queryExpr, + []string{"model", "request_body"}) + + if err != nil { + observability.Debugf("MilvusCache.UpdateWithResponse: query failed: %v", err) + metrics.RecordCacheOperation("milvus", "update_response", "error", time.Since(start).Seconds()) + return fmt.Errorf("failed to query pending entry: %w", err) + } + + if len(results) == 0 { + observability.Debugf("MilvusCache.UpdateWithResponse: no pending entry found, adding as new complete entry") + // Create new complete entry when no pending entry exists + _, err := c.addEntry("unknown", query, []byte(""), responseBody) + if err != nil { + metrics.RecordCacheOperation("milvus", "update_response", "error", time.Since(start).Seconds()) + } else { + metrics.RecordCacheOperation("milvus", "update_response", "success", time.Since(start).Seconds()) + } + return err + } + + // Get the model and request body from the pending entry + modelColumn := results[0].(*entity.ColumnVarChar) + requestColumn := results[1].(*entity.ColumnVarChar) + + if modelColumn.Len() > 0 { + model := modelColumn.Data()[0] + requestBody := requestColumn.Data()[0] + + observability.Debugf("MilvusCache.UpdateWithResponse: found pending entry, adding complete entry (model: %s)", model) + + // Create the complete entry with response data + _, err := c.addEntry(model, query, []byte(requestBody), responseBody) + if err != nil { + metrics.RecordCacheOperation("milvus", "update_response", "error", time.Since(start).Seconds()) + return fmt.Errorf("failed to add complete entry: %w", err) + } + + observability.Debugf("MilvusCache.UpdateWithResponse: successfully added complete entry with response") + metrics.RecordCacheOperation("milvus", "update_response", "success", time.Since(start).Seconds()) + } + + return nil +} + +// AddEntry stores a complete request-response pair in the cache +func (c *MilvusCache) AddEntry(model string, query string, requestBody, responseBody []byte) error { + start := time.Now() + + if !c.enabled { + return nil + } + + _, err := c.addEntry(model, query, requestBody, responseBody) + + if err != nil { + metrics.RecordCacheOperation("milvus", "add_entry", "error", time.Since(start).Seconds()) + } else { + metrics.RecordCacheOperation("milvus", "add_entry", "success", time.Since(start).Seconds()) + } + + return err +} + +// addEntry handles the internal logic for storing entries in Milvus +func (c *MilvusCache) addEntry(model string, query string, requestBody, responseBody []byte) (string, error) { + // Generate semantic embedding for the query + embedding, err := candle_binding.GetEmbedding(query, 0) // Auto-detect dimension + if err != nil { + return "", fmt.Errorf("failed to generate embedding: %w", err) + } + + // Generate unique ID + id := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("%s_%s_%d", model, query, time.Now().UnixNano())))) + + ctx := context.Background() + + // Prepare data for insertion + ids := []string{id} + models := []string{model} + queries := []string{query} + requestBodies := []string{string(requestBody)} + responseBodies := []string{string(responseBody)} + embeddings := [][]float32{embedding} + timestamps := []int64{time.Now().Unix()} + + // Create columns + idColumn := entity.NewColumnVarChar("id", ids) + modelColumn := entity.NewColumnVarChar("model", models) + queryColumn := entity.NewColumnVarChar("query", queries) + requestColumn := entity.NewColumnVarChar("request_body", requestBodies) + responseColumn := entity.NewColumnVarChar("response_body", responseBodies) + embeddingColumn := entity.NewColumnFloatVector(c.config.Collection.VectorField.Name, len(embedding), embeddings) + timestampColumn := entity.NewColumnInt64("timestamp", timestamps) + + // Insert the entry into the collection + observability.Debugf("MilvusCache.addEntry: inserting entry into collection '%s' (embedding_dim: %d, request_size: %d, response_size: %d)", + c.collectionName, len(embedding), len(requestBody), len(responseBody)) + _, err = c.client.Insert(ctx, c.collectionName, "", idColumn, modelColumn, queryColumn, requestColumn, responseColumn, embeddingColumn, timestampColumn) + if err != nil { + observability.Debugf("MilvusCache.addEntry: insert failed: %v", err) + return "", fmt.Errorf("failed to insert cache entry: %w", err) + } + + // Ensure data is persisted to storage + if err := c.client.Flush(ctx, c.collectionName, false); err != nil { + observability.Warnf("Failed to flush cache entry: %v", err) + } + + observability.Debugf("MilvusCache.addEntry: successfully added entry to Milvus") + observability.LogEvent("cache_entry_added", map[string]interface{}{ + "backend": "milvus", + "collection": c.collectionName, + "query": query, + "model": model, + "embedding_dimension": len(embedding), + }) + return query, nil +} + +// FindSimilar searches for semantically similar cached requests +func (c *MilvusCache) FindSimilar(model string, query string) ([]byte, bool, error) { + start := time.Now() + + if !c.enabled { + observability.Debugf("MilvusCache.FindSimilar: cache disabled") + return nil, false, nil + } + queryPreview := query + if len(query) > 50 { + queryPreview = query[:50] + "..." + } + observability.Debugf("MilvusCache.FindSimilar: searching for model='%s', query='%s' (len=%d chars)", + model, queryPreview, len(query)) + + // Generate semantic embedding for similarity comparison + queryEmbedding, err := candle_binding.GetEmbedding(query, 0) // Auto-detect dimension + if err != nil { + metrics.RecordCacheOperation("milvus", "find_similar", "error", time.Since(start).Seconds()) + return nil, false, fmt.Errorf("failed to generate embedding: %w", err) + } + + ctx := context.Background() + + // Query for completed entries with the same model + // Using Query approach for comprehensive similarity search + queryExpr := fmt.Sprintf("model == \"%s\" && response_body != \"\"", model) + observability.Debugf("MilvusCache.FindSimilar: querying with expr: %s (embedding_dim: %d)", + queryExpr, len(queryEmbedding)) + + // Use Query to get all matching entries, then compute similarity manually + results, err := c.client.Query(ctx, c.collectionName, []string{}, queryExpr, + []string{"query", "response_body", c.config.Collection.VectorField.Name}) + + if err != nil { + observability.Debugf("MilvusCache.FindSimilar: query failed: %v", err) + atomic.AddInt64(&c.missCount, 1) + metrics.RecordCacheOperation("milvus", "find_similar", "error", time.Since(start).Seconds()) + metrics.RecordCacheMiss() + return nil, false, nil + } + + if len(results) == 0 { + atomic.AddInt64(&c.missCount, 1) + observability.Debugf("MilvusCache.FindSimilar: no entries found with responses") + metrics.RecordCacheOperation("milvus", "find_similar", "miss", time.Since(start).Seconds()) + metrics.RecordCacheMiss() + return nil, false, nil + } + + // Calculate semantic similarity for each candidate + bestSimilarity := float32(-1.0) + var bestResponse string + + // Find columns by type instead of assuming order + var queryColumn *entity.ColumnVarChar + var responseColumn *entity.ColumnVarChar + var embeddingColumn *entity.ColumnFloatVector + + for _, col := range results { + switch typedCol := col.(type) { + case *entity.ColumnVarChar: + if typedCol.Name() == "query" { + queryColumn = typedCol + } else if typedCol.Name() == "response_body" { + responseColumn = typedCol + } + case *entity.ColumnFloatVector: + if typedCol.Name() == c.config.Collection.VectorField.Name { + embeddingColumn = typedCol + } + } + } + + if queryColumn == nil || responseColumn == nil || embeddingColumn == nil { + observability.Debugf("MilvusCache.FindSimilar: missing required columns in results") + atomic.AddInt64(&c.missCount, 1) + metrics.RecordCacheOperation("milvus", "find_similar", "error", time.Since(start).Seconds()) + metrics.RecordCacheMiss() + return nil, false, nil + } + + for i := 0; i < queryColumn.Len(); i++ { + storedEmbedding := embeddingColumn.Data()[i] + + // Calculate dot product similarity score + var similarity float32 + for j := 0; j < len(queryEmbedding) && j < len(storedEmbedding); j++ { + similarity += queryEmbedding[j] * storedEmbedding[j] + } + + if similarity > bestSimilarity { + bestSimilarity = similarity + bestResponse = responseColumn.Data()[i] + } + } + + observability.Debugf("MilvusCache.FindSimilar: best similarity=%.4f, threshold=%.4f (checked %d entries)", + bestSimilarity, c.similarityThreshold, queryColumn.Len()) + + if bestSimilarity >= c.similarityThreshold { + atomic.AddInt64(&c.hitCount, 1) + observability.Debugf("MilvusCache.FindSimilar: CACHE HIT - similarity=%.4f >= threshold=%.4f, response_size=%d bytes", + bestSimilarity, c.similarityThreshold, len(bestResponse)) + observability.LogEvent("cache_hit", map[string]interface{}{ + "backend": "milvus", + "similarity": bestSimilarity, + "threshold": c.similarityThreshold, + "model": model, + "collection": c.collectionName, + }) + metrics.RecordCacheOperation("milvus", "find_similar", "hit", time.Since(start).Seconds()) + metrics.RecordCacheHit() + return []byte(bestResponse), true, nil + } + + atomic.AddInt64(&c.missCount, 1) + observability.Debugf("MilvusCache.FindSimilar: CACHE MISS - best_similarity=%.4f < threshold=%.4f", + bestSimilarity, c.similarityThreshold) + observability.LogEvent("cache_miss", map[string]interface{}{ + "backend": "milvus", + "best_similarity": bestSimilarity, + "threshold": c.similarityThreshold, + "model": model, + "collection": c.collectionName, + "entries_checked": queryColumn.Len(), + }) + metrics.RecordCacheOperation("milvus", "find_similar", "miss", time.Since(start).Seconds()) + metrics.RecordCacheMiss() + return nil, false, nil +} + +// Close releases all resources held by the cache +func (c *MilvusCache) Close() error { + if c.client != nil { + return c.client.Close() + } + return nil +} + +// GetStats provides current cache performance metrics +func (c *MilvusCache) GetStats() CacheStats { + c.mu.RLock() + defer c.mu.RUnlock() + + hits := atomic.LoadInt64(&c.hitCount) + misses := atomic.LoadInt64(&c.missCount) + total := hits + misses + + var hitRatio float64 + if total > 0 { + hitRatio = float64(hits) / float64(total) + } + + // Retrieve collection statistics from Milvus + totalEntries := 0 + if c.enabled && c.client != nil { + ctx := context.Background() + stats, err := c.client.GetCollectionStatistics(ctx, c.collectionName) + if err == nil { + // Extract entity count from statistics + if entityCount, ok := stats["row_count"]; ok { + fmt.Sscanf(entityCount, "%d", &totalEntries) + observability.Debugf("MilvusCache.GetStats: collection '%s' contains %d entries", + c.collectionName, totalEntries) + } + } else { + observability.Debugf("MilvusCache.GetStats: failed to get collection stats: %v", err) + } + } + + cacheStats := CacheStats{ + TotalEntries: totalEntries, + HitCount: hits, + MissCount: misses, + HitRatio: hitRatio, + } + + if c.lastCleanupTime != nil { + cacheStats.LastCleanupTime = c.lastCleanupTime + } + + return cacheStats +} diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go index fe1318e1..7a1441f3 100644 --- a/src/semantic-router/pkg/config/config.go +++ b/src/semantic-router/pkg/config/config.go @@ -49,7 +49,26 @@ type RouterConfig struct { ReasoningFamilies map[string]ReasoningFamilyConfig `yaml:"reasoning_families,omitempty"` // Semantic cache configuration - SemanticCache SemanticCacheConfig `yaml:"semantic_cache"` + SemanticCache struct { + // Type of cache backend to use + BackendType string `yaml:"backend_type,omitempty"` + + // Enable semantic caching + Enabled bool `yaml:"enabled"` + + // Similarity threshold for cache hits (0.0-1.0) + // If not specified, will use the BertModel.Threshold + SimilarityThreshold *float32 `yaml:"similarity_threshold,omitempty"` + + // Maximum number of cache entries to keep (applies to in-memory cache) + MaxEntries int `yaml:"max_entries,omitempty"` + + // Time-to-live for cache entries in seconds (0 means no expiration) + TTLSeconds int `yaml:"ttl_seconds,omitempty"` + + // Path to backend-specific configuration file + BackendConfigPath string `yaml:"backend_config_path,omitempty"` + } `yaml:"semantic_cache"` // Prompt guard configuration PromptGuard PromptGuardConfig `yaml:"prompt_guard"` @@ -67,22 +86,6 @@ type RouterConfig struct { API APIConfig `yaml:"api"` } -// SemanticCacheConfig represents configuration for the semantic cache -type SemanticCacheConfig struct { - // Enable semantic caching - Enabled bool `yaml:"enabled"` - - // Similarity threshold for cache hits (0.0-1.0) - // If not specified, will use the BertModel.Threshold - SimilarityThreshold *float32 `yaml:"similarity_threshold,omitempty"` - - // Maximum number of cache entries to keep - MaxEntries int `yaml:"max_entries,omitempty"` - - // Time-to-live for cache entries in seconds (0 means no expiration) - TTLSeconds int `yaml:"ttl_seconds,omitempty"` -} - // APIConfig represents configuration for API endpoints type APIConfig struct { // Batch classification configuration diff --git a/src/semantic-router/pkg/config/config_test.go b/src/semantic-router/pkg/config/config_test.go index 464f0cc7..cc83f8c1 100644 --- a/src/semantic-router/pkg/config/config_test.go +++ b/src/semantic-router/pkg/config/config_test.go @@ -146,12 +146,16 @@ tools: // Verify default model Expect(cfg.DefaultModel).To(Equal("model-b")) - // Verify semantic cache + // Verify semantic cache (legacy fields) Expect(cfg.SemanticCache.Enabled).To(BeTrue()) Expect(*cfg.SemanticCache.SimilarityThreshold).To(Equal(float32(0.9))) Expect(cfg.SemanticCache.MaxEntries).To(Equal(1000)) Expect(cfg.SemanticCache.TTLSeconds).To(Equal(3600)) + // New fields should have default/zero values when not specified + Expect(cfg.SemanticCache.BackendType).To(BeEmpty()) + Expect(cfg.SemanticCache.BackendConfigPath).To(BeEmpty()) + // Verify prompt guard Expect(cfg.PromptGuard.Enabled).To(BeTrue()) Expect(cfg.PromptGuard.ModelID).To(Equal("test-jailbreak-model")) @@ -968,6 +972,311 @@ default_model: "missing-default-model" }) }) + Describe("Semantic Cache Backend Configuration", func() { + Context("with memory backend configuration", func() { + BeforeEach(func() { + configContent := ` +semantic_cache: + enabled: true + backend_type: "memory" + similarity_threshold: 0.85 + max_entries: 2000 + ttl_seconds: 1800 +` + err := os.WriteFile(configFile, []byte(configContent), 0o644) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should parse memory backend configuration correctly", func() { + cfg, err := config.LoadConfig(configFile) + Expect(err).NotTo(HaveOccurred()) + + Expect(cfg.SemanticCache.Enabled).To(BeTrue()) + Expect(cfg.SemanticCache.BackendType).To(Equal("memory")) + Expect(*cfg.SemanticCache.SimilarityThreshold).To(Equal(float32(0.85))) + Expect(cfg.SemanticCache.MaxEntries).To(Equal(2000)) + Expect(cfg.SemanticCache.TTLSeconds).To(Equal(1800)) + Expect(cfg.SemanticCache.BackendConfigPath).To(BeEmpty()) + }) + }) + + Context("with milvus backend configuration", func() { + BeforeEach(func() { + configContent := ` +semantic_cache: + enabled: true + backend_type: "milvus" + similarity_threshold: 0.9 + ttl_seconds: 7200 + backend_config_path: "config/cache/milvus.yaml" +` + err := os.WriteFile(configFile, []byte(configContent), 0o644) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should parse milvus backend configuration correctly", func() { + cfg, err := config.LoadConfig(configFile) + Expect(err).NotTo(HaveOccurred()) + + Expect(cfg.SemanticCache.Enabled).To(BeTrue()) + Expect(cfg.SemanticCache.BackendType).To(Equal("milvus")) + Expect(*cfg.SemanticCache.SimilarityThreshold).To(Equal(float32(0.9))) + Expect(cfg.SemanticCache.TTLSeconds).To(Equal(7200)) + Expect(cfg.SemanticCache.BackendConfigPath).To(Equal("config/cache/milvus.yaml")) + + // MaxEntries should be ignored for Milvus backend + Expect(cfg.SemanticCache.MaxEntries).To(Equal(0)) + }) + }) + + Context("with disabled cache", func() { + BeforeEach(func() { + configContent := ` +semantic_cache: + enabled: false + backend_type: "memory" + similarity_threshold: 0.8 + max_entries: 1000 + ttl_seconds: 3600 +` + err := os.WriteFile(configFile, []byte(configContent), 0o644) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should preserve configuration even when cache is disabled", func() { + cfg, err := config.LoadConfig(configFile) + Expect(err).NotTo(HaveOccurred()) + + Expect(cfg.SemanticCache.Enabled).To(BeFalse()) + Expect(cfg.SemanticCache.BackendType).To(Equal("memory")) + Expect(*cfg.SemanticCache.SimilarityThreshold).To(Equal(float32(0.8))) + }) + }) + + Context("with minimal configuration", func() { + BeforeEach(func() { + configContent := ` +semantic_cache: + enabled: true +` + err := os.WriteFile(configFile, []byte(configContent), 0o644) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should handle minimal configuration with default values", func() { + cfg, err := config.LoadConfig(configFile) + Expect(err).NotTo(HaveOccurred()) + + Expect(cfg.SemanticCache.Enabled).To(BeTrue()) + Expect(cfg.SemanticCache.BackendType).To(BeEmpty()) // Should default to empty (memory) + Expect(cfg.SemanticCache.SimilarityThreshold).To(BeNil()) // Will fallback to BERT threshold + Expect(cfg.SemanticCache.MaxEntries).To(Equal(0)) + Expect(cfg.SemanticCache.TTLSeconds).To(Equal(0)) + Expect(cfg.SemanticCache.BackendConfigPath).To(BeEmpty()) + }) + }) + + Context("with comprehensive configuration", func() { + BeforeEach(func() { + configContent := ` +bert_model: + threshold: 0.7 + +semantic_cache: + enabled: true + backend_type: "milvus" + similarity_threshold: 0.95 + ttl_seconds: 14400 + backend_config_path: "config/cache/production_milvus.yaml" +` + err := os.WriteFile(configFile, []byte(configContent), 0o644) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should parse all semantic cache fields correctly", func() { + cfg, err := config.LoadConfig(configFile) + Expect(err).NotTo(HaveOccurred()) + + Expect(cfg.SemanticCache.Enabled).To(BeTrue()) + Expect(cfg.SemanticCache.BackendType).To(Equal("milvus")) + Expect(*cfg.SemanticCache.SimilarityThreshold).To(Equal(float32(0.95))) + Expect(cfg.SemanticCache.TTLSeconds).To(Equal(14400)) + Expect(cfg.SemanticCache.BackendConfigPath).To(Equal("config/cache/production_milvus.yaml")) + + // Verify threshold resolution + threshold := cfg.GetCacheSimilarityThreshold() + Expect(threshold).To(Equal(float32(0.95))) // Should use cache threshold, not BERT + }) + }) + + Context("threshold fallback behavior", func() { + BeforeEach(func() { + configContent := ` +bert_model: + threshold: 0.75 + +semantic_cache: + enabled: true + backend_type: "memory" + max_entries: 500 + # No similarity_threshold specified +` + err := os.WriteFile(configFile, []byte(configContent), 0o644) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should fall back to BERT threshold when cache threshold not specified", func() { + cfg, err := config.LoadConfig(configFile) + Expect(err).NotTo(HaveOccurred()) + + Expect(cfg.SemanticCache.SimilarityThreshold).To(BeNil()) + + // GetCacheSimilarityThreshold should return BERT threshold + threshold := cfg.GetCacheSimilarityThreshold() + Expect(threshold).To(Equal(float32(0.75))) + }) + }) + + Context("with edge case values", func() { + BeforeEach(func() { + configContent := ` +semantic_cache: + enabled: true + backend_type: "memory" + similarity_threshold: 1.0 + max_entries: 0 + ttl_seconds: -1 + backend_config_path: "" +` + err := os.WriteFile(configFile, []byte(configContent), 0o644) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should handle edge case values correctly", func() { + cfg, err := config.LoadConfig(configFile) + Expect(err).NotTo(HaveOccurred()) + + Expect(cfg.SemanticCache.Enabled).To(BeTrue()) + Expect(cfg.SemanticCache.BackendType).To(Equal("memory")) + Expect(*cfg.SemanticCache.SimilarityThreshold).To(Equal(float32(1.0))) + Expect(cfg.SemanticCache.MaxEntries).To(Equal(0)) + Expect(cfg.SemanticCache.TTLSeconds).To(Equal(-1)) + Expect(cfg.SemanticCache.BackendConfigPath).To(BeEmpty()) + }) + }) + + Context("with unsupported backend type", func() { + BeforeEach(func() { + configContent := ` +semantic_cache: + enabled: true + backend_type: "redis" + similarity_threshold: 0.8 +` + err := os.WriteFile(configFile, []byte(configContent), 0o644) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should parse unsupported backend type without error (validation happens at runtime)", func() { + cfg, err := config.LoadConfig(configFile) + Expect(err).NotTo(HaveOccurred()) + + // Configuration parsing should succeed + Expect(cfg.SemanticCache.Enabled).To(BeTrue()) + Expect(cfg.SemanticCache.BackendType).To(Equal("redis")) + + // Runtime validation will catch unsupported backend types + }) + }) + + Context("with production-like configuration", func() { + BeforeEach(func() { + configContent := ` +bert_model: + model_id: sentence-transformers/all-MiniLM-L12-v2 + threshold: 0.6 + use_cpu: false + +semantic_cache: + enabled: true + backend_type: "milvus" + similarity_threshold: 0.85 + ttl_seconds: 86400 # 24 hours + backend_config_path: "config/cache/milvus.yaml" + +categories: + - name: "production" + description: "Production workload" + model_scores: + - model: "gpt-4" + score: 0.95 + +default_model: "gpt-4" +` + err := os.WriteFile(configFile, []byte(configContent), 0o644) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should handle production-like configuration correctly", func() { + cfg, err := config.LoadConfig(configFile) + Expect(err).NotTo(HaveOccurred()) + + // Verify BERT config + Expect(cfg.BertModel.ModelID).To(Equal("sentence-transformers/all-MiniLM-L12-v2")) + Expect(cfg.BertModel.Threshold).To(Equal(float32(0.6))) + Expect(cfg.BertModel.UseCPU).To(BeFalse()) + + // Verify semantic cache config + Expect(cfg.SemanticCache.Enabled).To(BeTrue()) + Expect(cfg.SemanticCache.BackendType).To(Equal("milvus")) + Expect(*cfg.SemanticCache.SimilarityThreshold).To(Equal(float32(0.85))) + Expect(cfg.SemanticCache.TTLSeconds).To(Equal(86400)) + Expect(cfg.SemanticCache.BackendConfigPath).To(Equal("config/cache/milvus.yaml")) + + // Verify threshold resolution + threshold := cfg.GetCacheSimilarityThreshold() + Expect(threshold).To(Equal(float32(0.85))) // Should use cache threshold + + // Verify other config is still working + Expect(cfg.DefaultModel).To(Equal("gpt-4")) + Expect(cfg.Categories).To(HaveLen(1)) + }) + }) + + Context("with multiple backend configurations in comments", func() { + BeforeEach(func() { + configContent := ` +semantic_cache: + # Development configuration + enabled: true + backend_type: "memory" + similarity_threshold: 0.8 + max_entries: 1000 + ttl_seconds: 3600 + + # Production configuration (commented out) + # backend_type: "milvus" + # backend_config_path: "config/cache/milvus.yaml" + # max_entries is ignored for Milvus +` + err := os.WriteFile(configFile, []byte(configContent), 0o644) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should parse active configuration and ignore commented alternatives", func() { + cfg, err := config.LoadConfig(configFile) + Expect(err).NotTo(HaveOccurred()) + + Expect(cfg.SemanticCache.Enabled).To(BeTrue()) + Expect(cfg.SemanticCache.BackendType).To(Equal("memory")) + Expect(*cfg.SemanticCache.SimilarityThreshold).To(Equal(float32(0.8))) + Expect(cfg.SemanticCache.MaxEntries).To(Equal(1000)) + Expect(cfg.SemanticCache.TTLSeconds).To(Equal(3600)) + Expect(cfg.SemanticCache.BackendConfigPath).To(BeEmpty()) // Comments are ignored + }) + }) + }) + Describe("PII Constants", func() { It("should have all expected PII type constants defined", func() { expectedPIITypes := []string{ diff --git a/src/semantic-router/pkg/extproc/caching_test.go b/src/semantic-router/pkg/extproc/caching_test.go index b0f3f6bd..2be6da39 100644 --- a/src/semantic-router/pkg/extproc/caching_test.go +++ b/src/semantic-router/pkg/extproc/caching_test.go @@ -29,13 +29,16 @@ var _ = Describe("Caching Functionality", func() { Expect(err).NotTo(HaveOccurred()) // Override cache with enabled configuration - cacheOptions := cache.SemanticCacheOptions{ + cacheConfig := cache.CacheConfig{ + BackendType: cache.InMemoryCacheType, Enabled: true, SimilarityThreshold: 0.9, MaxEntries: 100, TTLSeconds: 3600, } - router.Cache = cache.NewSemanticCache(cacheOptions) + cacheBackend, err := cache.NewCacheBackend(cacheConfig) + Expect(err).NotTo(HaveOccurred()) + router.Cache = cacheBackend }) It("should handle cache miss scenario", func() { @@ -207,13 +210,16 @@ var _ = Describe("Caching Functionality", func() { Context("with cache disabled", func() { BeforeEach(func() { cfg.SemanticCache.Enabled = false - cacheOptions := cache.SemanticCacheOptions{ + cacheConfig := cache.CacheConfig{ + BackendType: cache.InMemoryCacheType, Enabled: false, SimilarityThreshold: 0.9, MaxEntries: 100, TTLSeconds: 3600, } - router.Cache = cache.NewSemanticCache(cacheOptions) + cacheBackend, err := cache.NewCacheBackend(cacheConfig) + Expect(err).NotTo(HaveOccurred()) + router.Cache = cacheBackend }) It("should process requests normally without caching", func() { diff --git a/src/semantic-router/pkg/extproc/router.go b/src/semantic-router/pkg/extproc/router.go index d18b7d10..b3e3cb6b 100644 --- a/src/semantic-router/pkg/extproc/router.go +++ b/src/semantic-router/pkg/extproc/router.go @@ -27,7 +27,7 @@ type OpenAIRouter struct { CategoryDescriptions []string Classifier *classification.Classifier PIIChecker *pii.PolicyChecker - Cache *cache.SemanticCache + Cache cache.CacheBackend ToolsDatabase *tools.ToolsDatabase // Map to track pending requests and their unique IDs @@ -92,17 +92,31 @@ func NewOpenAIRouter(configPath string) (*OpenAIRouter, error) { log.Printf("Category descriptions: %v", categoryDescriptions) // Create semantic cache with config options - cacheOptions := cache.SemanticCacheOptions{ + cacheConfig := cache.CacheConfig{ + BackendType: cache.CacheBackendType(cfg.SemanticCache.BackendType), + Enabled: cfg.SemanticCache.Enabled, SimilarityThreshold: cfg.GetCacheSimilarityThreshold(), MaxEntries: cfg.SemanticCache.MaxEntries, TTLSeconds: cfg.SemanticCache.TTLSeconds, - Enabled: cfg.SemanticCache.Enabled, + BackendConfigPath: cfg.SemanticCache.BackendConfigPath, + } + + // Use default backend type if not specified + if cacheConfig.BackendType == "" { + cacheConfig.BackendType = cache.InMemoryCacheType + } + + semanticCache, err := cache.NewCacheBackend(cacheConfig) + if err != nil { + return nil, fmt.Errorf("failed to create semantic cache: %w", err) } - semanticCache := cache.NewSemanticCache(cacheOptions) if semanticCache.IsEnabled() { - log.Printf("Semantic cache enabled with threshold: %.4f, max entries: %d, TTL: %d seconds", - cacheOptions.SimilarityThreshold, cacheOptions.MaxEntries, cacheOptions.TTLSeconds) + log.Printf("Semantic cache enabled (backend: %s) with threshold: %.4f, TTL: %d seconds", + cacheConfig.BackendType, cacheConfig.SimilarityThreshold, cacheConfig.TTLSeconds) + if cacheConfig.BackendType == cache.InMemoryCacheType { + log.Printf("In-memory cache max entries: %d", cacheConfig.MaxEntries) + } } else { log.Println("Semantic cache is disabled") } diff --git a/src/semantic-router/pkg/extproc/test_utils_test.go b/src/semantic-router/pkg/extproc/test_utils_test.go index 5ca4ca3b..218b10d5 100644 --- a/src/semantic-router/pkg/extproc/test_utils_test.go +++ b/src/semantic-router/pkg/extproc/test_utils_test.go @@ -130,7 +130,15 @@ func CreateTestConfig() *config.RouterConfig { }, }, DefaultModel: "model-b", - SemanticCache: config.SemanticCacheConfig{ + SemanticCache: struct { + BackendType string `yaml:"backend_type,omitempty"` + Enabled bool `yaml:"enabled"` + SimilarityThreshold *float32 `yaml:"similarity_threshold,omitempty"` + MaxEntries int `yaml:"max_entries,omitempty"` + TTLSeconds int `yaml:"ttl_seconds,omitempty"` + BackendConfigPath string `yaml:"backend_config_path,omitempty"` + }{ + BackendType: "memory", Enabled: false, // Disable for most tests SimilarityThreshold: &[]float32{0.9}[0], MaxEntries: 100, @@ -202,13 +210,17 @@ func CreateTestRouter(cfg *config.RouterConfig) (*extproc.OpenAIRouter, error) { } // Create semantic cache - cacheOptions := cache.SemanticCacheOptions{ + cacheConfig := cache.CacheConfig{ + BackendType: cache.InMemoryCacheType, + Enabled: cfg.SemanticCache.Enabled, SimilarityThreshold: cfg.GetCacheSimilarityThreshold(), MaxEntries: cfg.SemanticCache.MaxEntries, TTLSeconds: cfg.SemanticCache.TTLSeconds, - Enabled: cfg.SemanticCache.Enabled, } - semanticCache := cache.NewSemanticCache(cacheOptions) + semanticCache, err := cache.NewCacheBackend(cacheConfig) + if err != nil { + return nil, err + } // Create tools database toolsOptions := tools.ToolsDatabaseOptions{ diff --git a/src/semantic-router/pkg/metrics/metrics.go b/src/semantic-router/pkg/metrics/metrics.go index bc465848..354b13f0 100644 --- a/src/semantic-router/pkg/metrics/metrics.go +++ b/src/semantic-router/pkg/metrics/metrics.go @@ -193,6 +193,42 @@ var ( }, ) + // CacheMisses tracks cache misses + CacheMisses = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "llm_cache_misses_total", + Help: "The total number of cache misses", + }, + ) + + // CacheOperationDuration tracks the duration of cache operations by backend and operation type + CacheOperationDuration = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "llm_cache_operation_duration_seconds", + Help: "The duration of cache operations in seconds", + Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10}, + }, + []string{"backend", "operation"}, + ) + + // CacheOperationTotal tracks the total number of cache operations by backend and operation type + CacheOperationTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "llm_cache_operations_total", + Help: "The total number of cache operations", + }, + []string{"backend", "operation", "status"}, + ) + + // CacheEntriesTotal tracks the total number of entries in the cache by backend + CacheEntriesTotal = promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "llm_cache_entries_total", + Help: "The total number of entries in the cache", + }, + []string{"backend"}, + ) + // CategoryClassifications tracks the number of times each category is classified CategoryClassifications = promauto.NewGaugeVec( prometheus.GaugeOpts{ @@ -302,6 +338,22 @@ func RecordCacheHit() { CacheHits.Inc() } +// RecordCacheMiss records a cache miss +func RecordCacheMiss() { + CacheMisses.Inc() +} + +// RecordCacheOperation records a cache operation with duration and status +func RecordCacheOperation(backend, operation, status string, duration float64) { + CacheOperationDuration.WithLabelValues(backend, operation).Observe(duration) + CacheOperationTotal.WithLabelValues(backend, operation, status).Inc() +} + +// UpdateCacheEntries updates the current number of cache entries for a backend +func UpdateCacheEntries(backend string, count int) { + CacheEntriesTotal.WithLabelValues(backend).Set(float64(count)) +} + // RecordCategoryClassification increments the gauge for a specific category classification func RecordCategoryClassification(category string) { CategoryClassifications.WithLabelValues(category).Inc()