Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/test-and-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ jobs:
docker ps --filter "name=milvus-semantic-cache"

- name: Run semantic router tests
run: make test
run: make test --debug=v
env:
CI: true
CI_MINIMAL_MODELS: ${{ github.event_name == 'pull_request' }}
Expand All @@ -126,6 +126,7 @@ jobs:
docker stop milvus-semantic-cache || true
docker rm milvus-semantic-cache || true
echo "Milvus container cleaned up"
SKIP_TOOL_CALL_TESTS: true

- name: Upload test artifacts on failure
if: failure()
Expand Down
108 changes: 108 additions & 0 deletions config/config.development.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Development Configuration Example with Stdout Tracing
# This configuration enables distributed tracing with stdout exporter
# for local development and debugging.

bert_model:
model_id: models/all-MiniLM-L12-v2
threshold: 0.6
use_cpu: true

semantic_cache:
enabled: true
backend_type: "memory"
similarity_threshold: 0.8
max_entries: 100
ttl_seconds: 600
eviction_policy: "fifo"
use_hnsw: true # Enable HNSW for faster search
hnsw_m: 16
hnsw_ef_construction: 200

tools:
enabled: false
top_k: 3
similarity_threshold: 0.2
tools_db_path: "config/tools_db.json"
fallback_to_empty: true

prompt_guard:
enabled: false

vllm_endpoints:
- name: "local-endpoint"
address: "127.0.0.1"
port: 8000
weight: 1

model_config:
"test-model":
pii_policy:
allow_by_default: true

classifier:
category_model:
model_id: "models/category_classifier_modernbert-base_model"
use_modernbert: true
threshold: 0.6
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"

categories:
- name: test
system_prompt: "You are a test assistant."
# Example: Category-level cache settings
# semantic_cache_enabled: true
# semantic_cache_similarity_threshold: 0.85
model_scores:
- model: test-model
score: 1.0
use_reasoning: false

default_model: test-model

# Enable OpenAI Responses API adapter (experimental)
enable_responses_adapter: true

# Auto model name for automatic model selection (optional)
# Uncomment and set to customize the model name for automatic routing
# auto_model_name: "MoM"

api:
batch_classification:
max_batch_size: 10
metrics:
enabled: true

# Observability Configuration - Development with Stdout
observability:
tracing:
# Enable tracing for development/debugging
enabled: true

# OpenTelemetry provider
provider: "opentelemetry"

exporter:
# Stdout exporter prints traces to console (great for debugging)
type: "stdout"

# No endpoint needed for stdout
# endpoint: ""
# insecure: true

sampling:
# Always sample in development to see all traces
type: "always_on"

# Rate not used for always_on
# rate: 1.0

resource:
# Service name for trace identification
service_name: "vllm-semantic-router-dev"

# Version for development
service_version: "dev"

# Environment identifier
deployment_environment: "development"
5 changes: 4 additions & 1 deletion config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ semantic_cache:
# Options: "bert" (fast, 384-dim), "qwen3" (high quality, 1024-dim, 32K context), "gemma" (balanced, 768-dim, 8K context)
# Default: "bert" (fastest, lowest memory)
embedding_model: "bert"

tools:
enabled: true
top_k: 3
Expand Down Expand Up @@ -480,6 +480,9 @@ reasoning_families:
# Global default reasoning effort level
default_reasoning_effort: high

# Enable OpenAI Responses API adapter (experimental)
enable_responses_adapter: true

# API Configuration
api:
batch_classification:
Expand Down
Empty file removed dashboard/backend/.gitkeep
Empty file.
Loading
Loading