Skip to content

Commit 9efcc06

Browse files
committed
feat: add Milvus vector database backend for semantic cache
- Implement MilvusCache backend with persistent storage - Add Milvus configuration file and connection management - Support vector similarity search with configurable indexing - Add TTL support and collection lifecycle management - Include Milvus dependencies and build configuration Addresses #95 Signed-off-by: Huamin Chen <[email protected]>
1 parent 11a7824 commit 9efcc06

File tree

10 files changed

+1971
-643
lines changed

10 files changed

+1971
-643
lines changed

Makefile

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,66 @@
1-
.PHONY: all build clean test docker-build podman-build docker-run podman-run
1+
.PHONY: all build clean test docker-build podman-build docker-run podman-run start-milvus stop-milvus restart-milvus milvus-status clean-milvus test-milvus-cache test-semantic-router-milvus help
22

33
# Default target
44
all: build
55

6+
# Help target
7+
help:
8+
@echo "Available targets:"
9+
@echo " Build targets:"
10+
@echo " all - Build everything (default)"
11+
@echo " build - Build Rust library and Go router"
12+
@echo " rust - Build only the Rust library"
13+
@echo " build-router - Build only the Go router"
14+
@echo " clean - Clean build artifacts"
15+
@echo ""
16+
@echo " Run targets:"
17+
@echo " run-router - Run the router (CONFIG_FILE=config/config.yaml)"
18+
@echo " run-envoy - Run Envoy proxy"
19+
@echo ""
20+
@echo " Test targets:"
21+
@echo " test - Run all tests"
22+
@echo " test-binding - Test candle-binding"
23+
@echo " test-semantic-router - Test semantic router"
24+
@echo " test-category-classifier - Test category classifier"
25+
@echo " test-pii-classifier - Test PII classifier"
26+
@echo " test-jailbreak-classifier - Test jailbreak classifier"
27+
@echo ""
28+
@echo " Milvus targets (CONTAINER_RUNTIME=docker|podman):"
29+
@echo " start-milvus - Start Milvus container for testing"
30+
@echo " stop-milvus - Stop and remove Milvus container"
31+
@echo " restart-milvus - Restart Milvus container"
32+
@echo " milvus-status - Check Milvus container status"
33+
@echo " clean-milvus - Stop container and clean data"
34+
@echo " test-milvus-cache - Test cache with Milvus backend"
35+
@echo " test-semantic-router-milvus - Test router with Milvus cache"
36+
@echo " Example: CONTAINER_RUNTIME=podman make start-milvus"
37+
@echo ""
38+
@echo " Demo targets:"
39+
@echo " test-auto-prompt-reasoning - Test reasoning mode"
40+
@echo " test-auto-prompt-no-reasoning - Test normal mode"
41+
@echo " test-pii - Test PII detection"
42+
@echo " test-prompt-guard - Test jailbreak detection"
43+
@echo " test-tools - Test tool auto-selection"
44+
@echo ""
45+
@echo " Documentation targets:"
46+
@echo " docs-dev - Start documentation dev server"
47+
@echo " docs-build - Build documentation"
48+
@echo " docs-serve - Serve built documentation"
49+
@echo " docs-clean - Clean documentation artifacts"
50+
@echo ""
51+
@echo " Environment variables:"
52+
@echo " CONTAINER_RUNTIME - Container runtime (docker|podman, default: docker)"
53+
@echo " CONFIG_FILE - Config file path (default: config/config.yaml)"
54+
@echo " VLLM_ENDPOINT - vLLM endpoint URL for testing"
55+
@echo ""
56+
@echo " Usage examples:"
57+
@echo " make start-milvus # Use Docker (default)"
58+
@echo " CONTAINER_RUNTIME=podman make start-milvus # Use Podman"
59+
@echo " CONFIG_FILE=custom.yaml make run-router # Use custom config"
60+
61+
# Container runtime (docker or podman)
62+
CONTAINER_RUNTIME ?= docker
63+
664
# vLLM env var
765
VLLM_ENDPOINT ?=
866

@@ -30,7 +88,7 @@ rust:
3088
build-router: rust
3189
@echo "Building router..."
3290
@mkdir -p bin
33-
@cd src/semantic-router && go build -o ../../bin/router cmd/main.go
91+
@cd src/semantic-router && go build --tags=milvus -o ../../bin/router cmd/main.go
3492

3593
# Config file path with default
3694
CONFIG_FILE ?= config/config.yaml
@@ -195,6 +253,65 @@ download-models:
195253
hf download LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model --local-dir models/pii_classifier_modernbert-base_presidio_token_model; \
196254
fi
197255

256+
# Milvus container management
257+
start-milvus:
258+
@echo "Starting Milvus container for testing with $(CONTAINER_RUNTIME)..."
259+
@mkdir -p /tmp/milvus-data
260+
@$(CONTAINER_RUNTIME) run -d \
261+
--name milvus-semantic-cache \
262+
--security-opt seccomp:unconfined \
263+
-e ETCD_USE_EMBED=true \
264+
-e ETCD_DATA_DIR=/var/lib/milvus/etcd \
265+
-e ETCD_CONFIG_PATH=/milvus/configs/advanced/etcd.yaml \
266+
-e COMMON_STORAGETYPE=local \
267+
-e CLUSTER_ENABLED=false \
268+
-p 19530:19530 \
269+
-p 9091:9091 \
270+
-v /tmp/milvus-data:/var/lib/milvus \
271+
milvusdb/milvus:v2.3.3 \
272+
milvus run standalone
273+
@echo "Waiting for Milvus to be ready..."
274+
@sleep 15
275+
@echo "Milvus should be available at localhost:19530"
276+
277+
stop-milvus:
278+
@echo "Stopping Milvus container..."
279+
@$(CONTAINER_RUNTIME) stop milvus-semantic-cache || true
280+
@$(CONTAINER_RUNTIME) rm milvus-semantic-cache || true
281+
@sudo rm -rf /tmp/milvus-data || true
282+
@echo "Milvus container stopped and removed"
283+
284+
restart-milvus: stop-milvus start-milvus
285+
286+
milvus-status:
287+
@echo "Checking Milvus container status..."
288+
@if $(CONTAINER_RUNTIME) ps --filter "name=milvus-semantic-cache" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" | grep -q milvus-semantic-cache; then \
289+
echo "Milvus container is running:"; \
290+
$(CONTAINER_RUNTIME) ps --filter "name=milvus-semantic-cache" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"; \
291+
else \
292+
echo "Milvus container is not running"; \
293+
echo "Run 'make start-milvus' to start it"; \
294+
fi
295+
296+
clean-milvus: stop-milvus
297+
@echo "Cleaning up Milvus data..."
298+
@sudo rm -rf milvus-data || rm -rf milvus-data
299+
@echo "Milvus data directory cleaned"
300+
301+
# Test semantic cache with Milvus backend
302+
test-milvus-cache: start-milvus rust
303+
@echo "Testing semantic cache with Milvus backend..."
304+
@export LD_LIBRARY_PATH=$${PWD}/candle-binding/target/release && \
305+
cd src/semantic-router && CGO_ENABLED=1 go test -tags=milvus -v ./pkg/cache/
306+
@echo "Consider running 'make stop-milvus' when done testing"
307+
308+
# Test semantic-router with Milvus enabled
309+
test-semantic-router-milvus: build-router start-milvus
310+
@echo "Testing semantic-router with Milvus cache backend..."
311+
@export LD_LIBRARY_PATH=$${PWD}/candle-binding/target/release && \
312+
cd src/semantic-router && CGO_ENABLED=1 go test -tags=milvus -v ./...
313+
@echo "Consider running 'make stop-milvus' when done testing"
314+
198315
# Documentation targets
199316
docs-install:
200317
@echo "Installing documentation dependencies..."

config/cache/milvus.yaml

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
# Milvus Vector Database Configuration for Semantic Cache
2+
#
3+
# This configuration file contains settings for using Milvus as the semantic cache backend.
4+
# To use this configuration:
5+
# 1. Set backend_type: "milvus" in your main config.yaml
6+
# 2. Set backend_config_path: "config/cache/milvus.yaml" in your main config.yaml
7+
# 3. Ensure Milvus server is running and accessible
8+
# 4. Build with Milvus support: go build -tags=milvus
9+
10+
# Milvus connection settings
11+
connection:
12+
# Milvus server host (change for production deployment)
13+
host: "localhost" # For production: use your Milvus cluster endpoint
14+
15+
# Milvus server port
16+
port: 19530 # Standard Milvus port
17+
18+
# Database name (optional, defaults to "default")
19+
database: "semantic_router_cache"
20+
21+
# Connection timeout in seconds
22+
timeout: 30
23+
24+
# Authentication (enable for production)
25+
auth:
26+
enabled: false # Set to true for production
27+
username: "" # Your Milvus username
28+
password: "" # Your Milvus password
29+
30+
# TLS/SSL configuration (recommended for production)
31+
tls:
32+
enabled: false # Set to true for secure connections
33+
cert_file: "" # Path to client certificate
34+
key_file: "" # Path to client private key
35+
ca_file: "" # Path to CA certificate
36+
37+
# Collection settings
38+
collection:
39+
# Name of the collection to store cache entries
40+
name: "semantic_cache"
41+
42+
# Description of the collection
43+
description: "Semantic cache for LLM request-response pairs"
44+
45+
# Vector field configuration
46+
vector_field:
47+
# Name of the vector field
48+
name: "embedding"
49+
50+
# Dimension of the embeddings (auto-detected from model at runtime)
51+
dimension: 384 # This value is ignored - dimension is auto-detected from the embedding model
52+
53+
# Metric type for similarity calculation
54+
metric_type: "IP" # Inner Product (cosine similarity for normalized vectors)
55+
56+
# Index configuration for the vector field
57+
index:
58+
# Index type (HNSW is recommended for most use cases)
59+
type: "HNSW"
60+
61+
# Index parameters
62+
params:
63+
M: 16 # Number of bi-directional links for each node
64+
efConstruction: 64 # Search scope during index construction
65+
66+
# Search configuration
67+
search:
68+
# Search parameters
69+
params:
70+
ef: 64 # Search scope during search (should be >= topk)
71+
72+
# Number of top results to retrieve for similarity comparison
73+
topk: 10
74+
75+
# Consistency level for search operations
76+
consistency_level: "Session" # Options: Strong, Session, Bounded, Eventually
77+
78+
# Performance and resource settings
79+
performance:
80+
# Connection pool settings
81+
connection_pool:
82+
# Maximum number of connections in the pool
83+
max_connections: 10
84+
85+
# Maximum idle connections
86+
max_idle_connections: 5
87+
88+
# Connection timeout for acquiring from pool
89+
acquire_timeout: 5
90+
91+
# Batch operation settings
92+
batch:
93+
# Maximum batch size for insert operations
94+
insert_batch_size: 1000
95+
96+
# Batch timeout in seconds
97+
timeout: 30
98+
99+
# Data management
100+
data_management:
101+
# Automatic data expiration (TTL) settings
102+
ttl:
103+
# Enable automatic TTL-based cleanup (requires TTL to be set in main config)
104+
enabled: true
105+
106+
# Field name to store timestamp for TTL calculation
107+
timestamp_field: "timestamp"
108+
109+
# Cleanup interval in seconds (how often to run cleanup)
110+
cleanup_interval: 3600 # 1 hour
111+
112+
# Compaction settings
113+
compaction:
114+
# Enable automatic compaction
115+
enabled: true
116+
117+
# Compaction interval in seconds
118+
interval: 86400 # 24 hours
119+
120+
# Logging and monitoring
121+
logging:
122+
# Log level for Milvus client operations (debug, info, warn, error)
123+
level: "info"
124+
125+
# Enable query/search logging for debugging
126+
enable_query_log: false
127+
128+
# Enable performance metrics collection
129+
enable_metrics: true
130+
131+
# Development and debugging settings
132+
development:
133+
# Drop collection on startup (WARNING: This will delete all cached data)
134+
drop_collection_on_startup: true # Enable for development to test dynamic dimensions
135+
136+
# Create collection if it doesn't exist
137+
auto_create_collection: true
138+
139+
# Print detailed error messages
140+
verbose_errors: true
141+
142+
# Example configurations for different environments:
143+
#
144+
# Local Development (Docker):
145+
# connection:
146+
# host: "localhost"
147+
# port: 19530
148+
# auth:
149+
# enabled: false
150+
# development:
151+
# drop_collection_on_startup: true # Clean start for development
152+
#
153+
# Production (Zilliz Cloud):
154+
# connection:
155+
# host: "your-cluster-endpoint.zillizcloud.com"
156+
# port: 443
157+
# auth:
158+
# enabled: true
159+
# username: "your-username"
160+
# password: "your-password"
161+
# tls:
162+
# enabled: true
163+
# development:
164+
# drop_collection_on_startup: false
165+
# auto_create_collection: false # Pre-create collections in production
166+
#
167+
# Kubernetes Deployment:
168+
# connection:
169+
# host: "milvus-service.milvus-system.svc.cluster.local"
170+
# port: 19530
171+
# timeout: 60 # Longer timeout for cluster environments

src/semantic-router/go.mod

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ replace (
1111
require (
1212
github.com/envoyproxy/go-control-plane/envoy v1.32.4
1313
github.com/fsnotify/fsnotify v1.7.0
14+
github.com/milvus-io/milvus-sdk-go/v2 v2.4.2
1415
github.com/onsi/ginkgo/v2 v2.23.4
1516
github.com/onsi/gomega v1.38.0
1617
github.com/openai/openai-go v1.12.0
@@ -25,12 +26,23 @@ require (
2526
github.com/beorn7/perks v1.0.1 // indirect
2627
github.com/cespare/xxhash/v2 v2.3.0 // indirect
2728
github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3 // indirect
29+
github.com/cockroachdb/errors v1.9.1 // indirect
30+
github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f // indirect
31+
github.com/cockroachdb/redact v1.1.3 // indirect
2832
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
33+
github.com/getsentry/sentry-go v0.12.0 // indirect
2934
github.com/go-logr/logr v1.4.2 // indirect
3035
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
36+
github.com/gogo/protobuf v1.3.2 // indirect
37+
github.com/golang/protobuf v1.5.4 // indirect
3138
github.com/google/go-cmp v0.7.0 // indirect
3239
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
40+
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect
41+
github.com/kr/pretty v0.3.1 // indirect
42+
github.com/kr/text v0.2.0 // indirect
43+
github.com/milvus-io/milvus-proto/go-api/v2 v2.4.10-0.20240819025435-512e3b98866a // indirect
3344
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
45+
github.com/pkg/errors v0.9.1 // indirect
3446
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
3547
github.com/prometheus/client_model v0.6.2 // indirect
3648
github.com/prometheus/common v0.65.0 // indirect
@@ -43,6 +55,7 @@ require (
4355
go.uber.org/automaxprocs v1.6.0 // indirect
4456
go.uber.org/multierr v1.10.0 // indirect
4557
golang.org/x/net v0.41.0 // indirect
58+
golang.org/x/sync v0.15.0 // indirect
4659
golang.org/x/sys v0.33.0 // indirect
4760
golang.org/x/text v0.26.0 // indirect
4861
golang.org/x/tools v0.33.0 // indirect

0 commit comments

Comments
 (0)