Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 122 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,66 @@
.PHONY: all build clean test docker-build podman-build docker-run podman-run
.PHONY: all build clean test docker-build podman-build docker-run podman-run start-milvus stop-milvus restart-milvus milvus-status clean-milvus test-milvus-cache test-semantic-router-milvus help

# Default target
all: build

# Help target
help:
@echo "Available targets:"
@echo " Build targets:"
@echo " all - Build everything (default)"
@echo " build - Build Rust library and Go router"
@echo " rust - Build only the Rust library"
@echo " build-router - Build only the Go router"
@echo " clean - Clean build artifacts"
@echo ""
@echo " Run targets:"
@echo " run-router - Run the router (CONFIG_FILE=config/config.yaml)"
@echo " run-envoy - Run Envoy proxy"
@echo ""
@echo " Test targets:"
@echo " test - Run all tests"
@echo " test-binding - Test candle-binding"
@echo " test-semantic-router - Test semantic router"
@echo " test-category-classifier - Test category classifier"
@echo " test-pii-classifier - Test PII classifier"
@echo " test-jailbreak-classifier - Test jailbreak classifier"
@echo ""
@echo " Milvus targets (CONTAINER_RUNTIME=docker|podman):"
@echo " start-milvus - Start Milvus container for testing"
@echo " stop-milvus - Stop and remove Milvus container"
@echo " restart-milvus - Restart Milvus container"
@echo " milvus-status - Check Milvus container status"
@echo " clean-milvus - Stop container and clean data"
@echo " test-milvus-cache - Test cache with Milvus backend"
@echo " test-semantic-router-milvus - Test router with Milvus cache"
@echo " Example: CONTAINER_RUNTIME=podman make start-milvus"
@echo ""
@echo " Demo targets:"
@echo " test-auto-prompt-reasoning - Test reasoning mode"
@echo " test-auto-prompt-no-reasoning - Test normal mode"
@echo " test-pii - Test PII detection"
@echo " test-prompt-guard - Test jailbreak detection"
@echo " test-tools - Test tool auto-selection"
@echo ""
@echo " Documentation targets:"
@echo " docs-dev - Start documentation dev server"
@echo " docs-build - Build documentation"
@echo " docs-serve - Serve built documentation"
@echo " docs-clean - Clean documentation artifacts"
@echo ""
@echo " Environment variables:"
@echo " CONTAINER_RUNTIME - Container runtime (docker|podman, default: docker)"
@echo " CONFIG_FILE - Config file path (default: config/config.yaml)"
@echo " VLLM_ENDPOINT - vLLM endpoint URL for testing"
@echo ""
@echo " Usage examples:"
@echo " make start-milvus # Use Docker (default)"
@echo " CONTAINER_RUNTIME=podman make start-milvus # Use Podman"
@echo " CONFIG_FILE=custom.yaml make run-router # Use custom config"

# Container runtime (docker or podman)
CONTAINER_RUNTIME ?= docker

# vLLM env var
VLLM_ENDPOINT ?=

Expand Down Expand Up @@ -30,7 +88,7 @@ rust:
build-router: rust
@echo "Building router..."
@mkdir -p bin
@cd src/semantic-router && go build -o ../../bin/router cmd/main.go
@cd src/semantic-router && go build --tags=milvus -o ../../bin/router cmd/main.go

# Config file path with default
CONFIG_FILE ?= config/config.yaml
Expand Down Expand Up @@ -104,9 +162,12 @@ test-jailbreak-classifier: rust
cd src/training/prompt_guard_fine_tuning && CGO_ENABLED=1 go run jailbreak_classifier_verifier.go

# Unit test semantic-router
# By default, Milvus tests are skipped. To enable them, set SKIP_MILVUS_TESTS=false
# Example: make test-semantic-router SKIP_MILVUS_TESTS=false
test-semantic-router: build-router
@echo "Testing semantic-router..."
@export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \
export SKIP_MILVUS_TESTS=$${SKIP_MILVUS_TESTS:-true} && \
cd src/semantic-router && CGO_ENABLED=1 go test -v ./...

# Test the Rust library and the Go binding
Expand Down Expand Up @@ -195,6 +256,65 @@ download-models:
hf download LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model --local-dir models/pii_classifier_modernbert-base_presidio_token_model; \
fi

# Milvus container management
start-milvus:
@echo "Starting Milvus container for testing with $(CONTAINER_RUNTIME)..."
@mkdir -p /tmp/milvus-data
@$(CONTAINER_RUNTIME) run -d \
--name milvus-semantic-cache \
--security-opt seccomp:unconfined \
-e ETCD_USE_EMBED=true \
-e ETCD_DATA_DIR=/var/lib/milvus/etcd \
-e ETCD_CONFIG_PATH=/milvus/configs/advanced/etcd.yaml \
-e COMMON_STORAGETYPE=local \
-e CLUSTER_ENABLED=false \
-p 19530:19530 \
-p 9091:9091 \
-v /tmp/milvus-data:/var/lib/milvus \
milvusdb/milvus:v2.3.3 \
milvus run standalone
@echo "Waiting for Milvus to be ready..."
@sleep 15
@echo "Milvus should be available at localhost:19530"

stop-milvus:
@echo "Stopping Milvus container..."
@$(CONTAINER_RUNTIME) stop milvus-semantic-cache || true
@$(CONTAINER_RUNTIME) rm milvus-semantic-cache || true
@sudo rm -rf /tmp/milvus-data || true
@echo "Milvus container stopped and removed"

restart-milvus: stop-milvus start-milvus

milvus-status:
@echo "Checking Milvus container status..."
@if $(CONTAINER_RUNTIME) ps --filter "name=milvus-semantic-cache" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" | grep -q milvus-semantic-cache; then \
echo "Milvus container is running:"; \
$(CONTAINER_RUNTIME) ps --filter "name=milvus-semantic-cache" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"; \
else \
echo "Milvus container is not running"; \
echo "Run 'make start-milvus' to start it"; \
fi

clean-milvus: stop-milvus
@echo "Cleaning up Milvus data..."
@sudo rm -rf milvus-data || rm -rf milvus-data
@echo "Milvus data directory cleaned"

# Test semantic cache with Milvus backend
test-milvus-cache: start-milvus rust
@echo "Testing semantic cache with Milvus backend..."
@export LD_LIBRARY_PATH=$${PWD}/candle-binding/target/release && \
cd src/semantic-router && CGO_ENABLED=1 go test -tags=milvus -v ./pkg/cache/
@echo "Consider running 'make stop-milvus' when done testing"

# Test semantic-router with Milvus enabled
test-semantic-router-milvus: build-router start-milvus
@echo "Testing semantic-router with Milvus cache backend..."
@export LD_LIBRARY_PATH=$${PWD}/candle-binding/target/release && \
cd src/semantic-router && CGO_ENABLED=1 go test -tags=milvus -v ./...
@echo "Consider running 'make stop-milvus' when done testing"

# Documentation targets
docs-install:
@echo "Installing documentation dependencies..."
Expand Down
171 changes: 171 additions & 0 deletions config/cache/milvus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
# Milvus Vector Database Configuration for Semantic Cache
#
# This configuration file contains settings for using Milvus as the semantic cache backend.
# To use this configuration:
# 1. Set backend_type: "milvus" in your main config.yaml
# 2. Set backend_config_path: "config/cache/milvus.yaml" in your main config.yaml
# 3. Ensure Milvus server is running and accessible
# 4. Build with Milvus support: go build -tags=milvus

# Milvus connection settings
connection:
# Milvus server host (change for production deployment)
host: "localhost" # For production: use your Milvus cluster endpoint

# Milvus server port
port: 19530 # Standard Milvus port

# Database name (optional, defaults to "default")
database: "semantic_router_cache"

# Connection timeout in seconds
timeout: 30

# Authentication (enable for production)
auth:
enabled: false # Set to true for production
username: "" # Your Milvus username
password: "" # Your Milvus password

# TLS/SSL configuration (recommended for production)
tls:
enabled: false # Set to true for secure connections
cert_file: "" # Path to client certificate
key_file: "" # Path to client private key
ca_file: "" # Path to CA certificate

# Collection settings
collection:
# Name of the collection to store cache entries
name: "semantic_cache"

# Description of the collection
description: "Semantic cache for LLM request-response pairs"

# Vector field configuration
vector_field:
# Name of the vector field
name: "embedding"

# Dimension of the embeddings (auto-detected from model at runtime)
dimension: 384 # This value is ignored - dimension is auto-detected from the embedding model

# Metric type for similarity calculation
metric_type: "IP" # Inner Product (cosine similarity for normalized vectors)

# Index configuration for the vector field
index:
# Index type (HNSW is recommended for most use cases)
type: "HNSW"

# Index parameters
params:
M: 16 # Number of bi-directional links for each node
efConstruction: 64 # Search scope during index construction

# Search configuration
search:
# Search parameters
params:
ef: 64 # Search scope during search (should be >= topk)

# Number of top results to retrieve for similarity comparison
topk: 10

# Consistency level for search operations
consistency_level: "Session" # Options: Strong, Session, Bounded, Eventually

# Performance and resource settings
performance:
# Connection pool settings
connection_pool:
# Maximum number of connections in the pool
max_connections: 10

# Maximum idle connections
max_idle_connections: 5

# Connection timeout for acquiring from pool
acquire_timeout: 5

# Batch operation settings
batch:
# Maximum batch size for insert operations
insert_batch_size: 1000

# Batch timeout in seconds
timeout: 30

# Data management
data_management:
# Automatic data expiration (TTL) settings
ttl:
# Enable automatic TTL-based cleanup (requires TTL to be set in main config)
enabled: true

# Field name to store timestamp for TTL calculation
timestamp_field: "timestamp"

# Cleanup interval in seconds (how often to run cleanup)
cleanup_interval: 3600 # 1 hour

# Compaction settings
compaction:
# Enable automatic compaction
enabled: true

# Compaction interval in seconds
interval: 86400 # 24 hours

# Logging and monitoring
logging:
# Log level for Milvus client operations (debug, info, warn, error)
level: "info"

# Enable query/search logging for debugging
enable_query_log: false

# Enable performance metrics collection
enable_metrics: true

# Development and debugging settings
development:
# Drop collection on startup (WARNING: This will delete all cached data)
drop_collection_on_startup: true # Enable for development to test dynamic dimensions

# Create collection if it doesn't exist
auto_create_collection: true

# Print detailed error messages
verbose_errors: true

# Example configurations for different environments:
#
# Local Development (Docker):
# connection:
# host: "localhost"
# port: 19530
# auth:
# enabled: false
# development:
# drop_collection_on_startup: true # Clean start for development
#
# Production (Zilliz Cloud):
# connection:
# host: "your-cluster-endpoint.zillizcloud.com"
# port: 443
# auth:
# enabled: true
# username: "your-username"
# password: "your-password"
# tls:
# enabled: true
# development:
# drop_collection_on_startup: false
# auto_create_collection: false # Pre-create collections in production
#
# Kubernetes Deployment:
# connection:
# host: "milvus-service.milvus-system.svc.cluster.local"
# port: 19530
# timeout: 60 # Longer timeout for cluster environments
18 changes: 17 additions & 1 deletion config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,25 @@ bert_model:
use_cpu: true
semantic_cache:
enabled: true
backend_type: "memory" # Options: "memory" or "milvus"
similarity_threshold: 0.8
max_entries: 1000
max_entries: 1000 # Only applies to memory backend
ttl_seconds: 3600

# For production environments, use Milvus for scalable caching:
# backend_type: "milvus"
# backend_config_path: "config/cache/milvus.yaml"

# Development/Testing: Use in-memory cache (current configuration)
# - Fast startup and no external dependencies
# - Limited to single instance scaling
# - Data lost on restart

# Production: Use Milvus vector database
# - Horizontally scalable and persistent
# - Supports distributed deployments
# - Requires Milvus cluster setup
# - To enable: uncomment the lines above and install Milvus dependencies
tools:
enabled: true # Set to true to enable automatic tool selection
top_k: 3 # Number of most relevant tools to select
Expand Down
Loading
Loading