Skip to content

Commit 3ce8a6e

Browse files
authored
feat: add milvus persistent storage support (#105)
* feat: add pluggable interface for semantic cache backends - Create CacheBackend interface with pluggable architecture - Refactor existing in-memory cache to implement new interface - Add cache factory pattern for backend selection - Support configurable similarity thresholds and TTL - Add comprehensive cache metrics and observability Addresses #94 Signed-off-by: Huamin Chen <[email protected]> * feat: add Milvus vector database backend for semantic cache - Implement MilvusCache backend with persistent storage - Add Milvus configuration file and connection management - Support vector similarity search with configurable indexing - Add TTL support and collection lifecycle management - Include Milvus dependencies and build configuration Addresses #95 Signed-off-by: Huamin Chen <[email protected]> * toggle milvus unit test Signed-off-by: Huamin Chen <[email protected]> * pre-commit fix Signed-off-by: Huamin Chen <[email protected]> * rebase Signed-off-by: Huamin Chen <[email protected]> --------- Signed-off-by: Huamin Chen <[email protected]>
1 parent ced6a8e commit 3ce8a6e

File tree

17 files changed

+2892
-854
lines changed

17 files changed

+2892
-854
lines changed

Makefile

Lines changed: 122 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,66 @@
1-
.PHONY: all build clean test docker-build podman-build docker-run podman-run
1+
.PHONY: all build clean test docker-build podman-build docker-run podman-run start-milvus stop-milvus restart-milvus milvus-status clean-milvus test-milvus-cache test-semantic-router-milvus help
22

33
# Default target
44
all: build
55

6+
# Help target
7+
help:
8+
@echo "Available targets:"
9+
@echo " Build targets:"
10+
@echo " all - Build everything (default)"
11+
@echo " build - Build Rust library and Go router"
12+
@echo " rust - Build only the Rust library"
13+
@echo " build-router - Build only the Go router"
14+
@echo " clean - Clean build artifacts"
15+
@echo ""
16+
@echo " Run targets:"
17+
@echo " run-router - Run the router (CONFIG_FILE=config/config.yaml)"
18+
@echo " run-envoy - Run Envoy proxy"
19+
@echo ""
20+
@echo " Test targets:"
21+
@echo " test - Run all tests"
22+
@echo " test-binding - Test candle-binding"
23+
@echo " test-semantic-router - Test semantic router"
24+
@echo " test-category-classifier - Test category classifier"
25+
@echo " test-pii-classifier - Test PII classifier"
26+
@echo " test-jailbreak-classifier - Test jailbreak classifier"
27+
@echo ""
28+
@echo " Milvus targets (CONTAINER_RUNTIME=docker|podman):"
29+
@echo " start-milvus - Start Milvus container for testing"
30+
@echo " stop-milvus - Stop and remove Milvus container"
31+
@echo " restart-milvus - Restart Milvus container"
32+
@echo " milvus-status - Check Milvus container status"
33+
@echo " clean-milvus - Stop container and clean data"
34+
@echo " test-milvus-cache - Test cache with Milvus backend"
35+
@echo " test-semantic-router-milvus - Test router with Milvus cache"
36+
@echo " Example: CONTAINER_RUNTIME=podman make start-milvus"
37+
@echo ""
38+
@echo " Demo targets:"
39+
@echo " test-auto-prompt-reasoning - Test reasoning mode"
40+
@echo " test-auto-prompt-no-reasoning - Test normal mode"
41+
@echo " test-pii - Test PII detection"
42+
@echo " test-prompt-guard - Test jailbreak detection"
43+
@echo " test-tools - Test tool auto-selection"
44+
@echo ""
45+
@echo " Documentation targets:"
46+
@echo " docs-dev - Start documentation dev server"
47+
@echo " docs-build - Build documentation"
48+
@echo " docs-serve - Serve built documentation"
49+
@echo " docs-clean - Clean documentation artifacts"
50+
@echo ""
51+
@echo " Environment variables:"
52+
@echo " CONTAINER_RUNTIME - Container runtime (docker|podman, default: docker)"
53+
@echo " CONFIG_FILE - Config file path (default: config/config.yaml)"
54+
@echo " VLLM_ENDPOINT - vLLM endpoint URL for testing"
55+
@echo ""
56+
@echo " Usage examples:"
57+
@echo " make start-milvus # Use Docker (default)"
58+
@echo " CONTAINER_RUNTIME=podman make start-milvus # Use Podman"
59+
@echo " CONFIG_FILE=custom.yaml make run-router # Use custom config"
60+
61+
# Container runtime (docker or podman)
62+
CONTAINER_RUNTIME ?= docker
63+
664
# vLLM env var
765
VLLM_ENDPOINT ?=
866

@@ -30,7 +88,7 @@ rust:
3088
build-router: rust
3189
@echo "Building router..."
3290
@mkdir -p bin
33-
@cd src/semantic-router && go build -o ../../bin/router cmd/main.go
91+
@cd src/semantic-router && go build --tags=milvus -o ../../bin/router cmd/main.go
3492

3593
# Config file path with default
3694
CONFIG_FILE ?= config/config.yaml
@@ -104,9 +162,12 @@ test-jailbreak-classifier: rust
104162
cd src/training/prompt_guard_fine_tuning && CGO_ENABLED=1 go run jailbreak_classifier_verifier.go
105163

106164
# Unit test semantic-router
165+
# By default, Milvus tests are skipped. To enable them, set SKIP_MILVUS_TESTS=false
166+
# Example: make test-semantic-router SKIP_MILVUS_TESTS=false
107167
test-semantic-router: build-router
108168
@echo "Testing semantic-router..."
109169
@export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \
170+
export SKIP_MILVUS_TESTS=$${SKIP_MILVUS_TESTS:-true} && \
110171
cd src/semantic-router && CGO_ENABLED=1 go test -v ./...
111172

112173
# Test the Rust library and the Go binding
@@ -195,6 +256,65 @@ download-models:
195256
hf download LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model --local-dir models/pii_classifier_modernbert-base_presidio_token_model; \
196257
fi
197258

259+
# Milvus container management
260+
start-milvus:
261+
@echo "Starting Milvus container for testing with $(CONTAINER_RUNTIME)..."
262+
@mkdir -p /tmp/milvus-data
263+
@$(CONTAINER_RUNTIME) run -d \
264+
--name milvus-semantic-cache \
265+
--security-opt seccomp:unconfined \
266+
-e ETCD_USE_EMBED=true \
267+
-e ETCD_DATA_DIR=/var/lib/milvus/etcd \
268+
-e ETCD_CONFIG_PATH=/milvus/configs/advanced/etcd.yaml \
269+
-e COMMON_STORAGETYPE=local \
270+
-e CLUSTER_ENABLED=false \
271+
-p 19530:19530 \
272+
-p 9091:9091 \
273+
-v /tmp/milvus-data:/var/lib/milvus \
274+
milvusdb/milvus:v2.3.3 \
275+
milvus run standalone
276+
@echo "Waiting for Milvus to be ready..."
277+
@sleep 15
278+
@echo "Milvus should be available at localhost:19530"
279+
280+
stop-milvus:
281+
@echo "Stopping Milvus container..."
282+
@$(CONTAINER_RUNTIME) stop milvus-semantic-cache || true
283+
@$(CONTAINER_RUNTIME) rm milvus-semantic-cache || true
284+
@sudo rm -rf /tmp/milvus-data || true
285+
@echo "Milvus container stopped and removed"
286+
287+
restart-milvus: stop-milvus start-milvus
288+
289+
milvus-status:
290+
@echo "Checking Milvus container status..."
291+
@if $(CONTAINER_RUNTIME) ps --filter "name=milvus-semantic-cache" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" | grep -q milvus-semantic-cache; then \
292+
echo "Milvus container is running:"; \
293+
$(CONTAINER_RUNTIME) ps --filter "name=milvus-semantic-cache" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"; \
294+
else \
295+
echo "Milvus container is not running"; \
296+
echo "Run 'make start-milvus' to start it"; \
297+
fi
298+
299+
clean-milvus: stop-milvus
300+
@echo "Cleaning up Milvus data..."
301+
@sudo rm -rf milvus-data || rm -rf milvus-data
302+
@echo "Milvus data directory cleaned"
303+
304+
# Test semantic cache with Milvus backend
305+
test-milvus-cache: start-milvus rust
306+
@echo "Testing semantic cache with Milvus backend..."
307+
@export LD_LIBRARY_PATH=$${PWD}/candle-binding/target/release && \
308+
cd src/semantic-router && CGO_ENABLED=1 go test -tags=milvus -v ./pkg/cache/
309+
@echo "Consider running 'make stop-milvus' when done testing"
310+
311+
# Test semantic-router with Milvus enabled
312+
test-semantic-router-milvus: build-router start-milvus
313+
@echo "Testing semantic-router with Milvus cache backend..."
314+
@export LD_LIBRARY_PATH=$${PWD}/candle-binding/target/release && \
315+
cd src/semantic-router && CGO_ENABLED=1 go test -tags=milvus -v ./...
316+
@echo "Consider running 'make stop-milvus' when done testing"
317+
198318
# Documentation targets
199319
docs-install:
200320
@echo "Installing documentation dependencies..."

config/cache/milvus.yaml

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
# Milvus Vector Database Configuration for Semantic Cache
2+
#
3+
# This configuration file contains settings for using Milvus as the semantic cache backend.
4+
# To use this configuration:
5+
# 1. Set backend_type: "milvus" in your main config.yaml
6+
# 2. Set backend_config_path: "config/cache/milvus.yaml" in your main config.yaml
7+
# 3. Ensure Milvus server is running and accessible
8+
# 4. Build with Milvus support: go build -tags=milvus
9+
10+
# Milvus connection settings
11+
connection:
12+
# Milvus server host (change for production deployment)
13+
host: "localhost" # For production: use your Milvus cluster endpoint
14+
15+
# Milvus server port
16+
port: 19530 # Standard Milvus port
17+
18+
# Database name (optional, defaults to "default")
19+
database: "semantic_router_cache"
20+
21+
# Connection timeout in seconds
22+
timeout: 30
23+
24+
# Authentication (enable for production)
25+
auth:
26+
enabled: false # Set to true for production
27+
username: "" # Your Milvus username
28+
password: "" # Your Milvus password
29+
30+
# TLS/SSL configuration (recommended for production)
31+
tls:
32+
enabled: false # Set to true for secure connections
33+
cert_file: "" # Path to client certificate
34+
key_file: "" # Path to client private key
35+
ca_file: "" # Path to CA certificate
36+
37+
# Collection settings
38+
collection:
39+
# Name of the collection to store cache entries
40+
name: "semantic_cache"
41+
42+
# Description of the collection
43+
description: "Semantic cache for LLM request-response pairs"
44+
45+
# Vector field configuration
46+
vector_field:
47+
# Name of the vector field
48+
name: "embedding"
49+
50+
# Dimension of the embeddings (auto-detected from model at runtime)
51+
dimension: 384 # This value is ignored - dimension is auto-detected from the embedding model
52+
53+
# Metric type for similarity calculation
54+
metric_type: "IP" # Inner Product (cosine similarity for normalized vectors)
55+
56+
# Index configuration for the vector field
57+
index:
58+
# Index type (HNSW is recommended for most use cases)
59+
type: "HNSW"
60+
61+
# Index parameters
62+
params:
63+
M: 16 # Number of bi-directional links for each node
64+
efConstruction: 64 # Search scope during index construction
65+
66+
# Search configuration
67+
search:
68+
# Search parameters
69+
params:
70+
ef: 64 # Search scope during search (should be >= topk)
71+
72+
# Number of top results to retrieve for similarity comparison
73+
topk: 10
74+
75+
# Consistency level for search operations
76+
consistency_level: "Session" # Options: Strong, Session, Bounded, Eventually
77+
78+
# Performance and resource settings
79+
performance:
80+
# Connection pool settings
81+
connection_pool:
82+
# Maximum number of connections in the pool
83+
max_connections: 10
84+
85+
# Maximum idle connections
86+
max_idle_connections: 5
87+
88+
# Connection timeout for acquiring from pool
89+
acquire_timeout: 5
90+
91+
# Batch operation settings
92+
batch:
93+
# Maximum batch size for insert operations
94+
insert_batch_size: 1000
95+
96+
# Batch timeout in seconds
97+
timeout: 30
98+
99+
# Data management
100+
data_management:
101+
# Automatic data expiration (TTL) settings
102+
ttl:
103+
# Enable automatic TTL-based cleanup (requires TTL to be set in main config)
104+
enabled: true
105+
106+
# Field name to store timestamp for TTL calculation
107+
timestamp_field: "timestamp"
108+
109+
# Cleanup interval in seconds (how often to run cleanup)
110+
cleanup_interval: 3600 # 1 hour
111+
112+
# Compaction settings
113+
compaction:
114+
# Enable automatic compaction
115+
enabled: true
116+
117+
# Compaction interval in seconds
118+
interval: 86400 # 24 hours
119+
120+
# Logging and monitoring
121+
logging:
122+
# Log level for Milvus client operations (debug, info, warn, error)
123+
level: "info"
124+
125+
# Enable query/search logging for debugging
126+
enable_query_log: false
127+
128+
# Enable performance metrics collection
129+
enable_metrics: true
130+
131+
# Development and debugging settings
132+
development:
133+
# Drop collection on startup (WARNING: This will delete all cached data)
134+
drop_collection_on_startup: true # Enable for development to test dynamic dimensions
135+
136+
# Create collection if it doesn't exist
137+
auto_create_collection: true
138+
139+
# Print detailed error messages
140+
verbose_errors: true
141+
142+
# Example configurations for different environments:
143+
#
144+
# Local Development (Docker):
145+
# connection:
146+
# host: "localhost"
147+
# port: 19530
148+
# auth:
149+
# enabled: false
150+
# development:
151+
# drop_collection_on_startup: true # Clean start for development
152+
#
153+
# Production (Zilliz Cloud):
154+
# connection:
155+
# host: "your-cluster-endpoint.zillizcloud.com"
156+
# port: 443
157+
# auth:
158+
# enabled: true
159+
# username: "your-username"
160+
# password: "your-password"
161+
# tls:
162+
# enabled: true
163+
# development:
164+
# drop_collection_on_startup: false
165+
# auto_create_collection: false # Pre-create collections in production
166+
#
167+
# Kubernetes Deployment:
168+
# connection:
169+
# host: "milvus-service.milvus-system.svc.cluster.local"
170+
# port: 19530
171+
# timeout: 60 # Longer timeout for cluster environments

config/config.yaml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,25 @@ bert_model:
44
use_cpu: true
55
semantic_cache:
66
enabled: true
7+
backend_type: "memory" # Options: "memory" or "milvus"
78
similarity_threshold: 0.8
8-
max_entries: 1000
9+
max_entries: 1000 # Only applies to memory backend
910
ttl_seconds: 3600
11+
12+
# For production environments, use Milvus for scalable caching:
13+
# backend_type: "milvus"
14+
# backend_config_path: "config/cache/milvus.yaml"
15+
16+
# Development/Testing: Use in-memory cache (current configuration)
17+
# - Fast startup and no external dependencies
18+
# - Limited to single instance scaling
19+
# - Data lost on restart
20+
21+
# Production: Use Milvus vector database
22+
# - Horizontally scalable and persistent
23+
# - Supports distributed deployments
24+
# - Requires Milvus cluster setup
25+
# - To enable: uncomment the lines above and install Milvus dependencies
1026
tools:
1127
enabled: true # Set to true to enable automatic tool selection
1228
top_k: 3 # Number of most relevant tools to select

0 commit comments

Comments
 (0)