From 1503c375706843d3d17fb21a032554998c71fc63 Mon Sep 17 00:00:00 2001 From: JaredforReal Date: Wed, 15 Oct 2025 11:06:42 +0800 Subject: [PATCH 1/7] add tracing to docker compose Signed-off-by: JaredforReal --- config/config.tracing.yaml | 104 ++++++++++++++++++ config/config.yaml | 36 +++--- deploy/docker-compose/README.md | 16 ++- .../addons/grafana-datasource-jaeger.yaml | 15 +++ deploy/docker-compose/docker-compose.yml | 16 +++ tools/tracing/README.md | 12 +- .../observability/tracing-quickstart.md | 55 +++++++++ 7 files changed, 230 insertions(+), 24 deletions(-) create mode 100644 config/config.tracing.yaml create mode 100644 deploy/docker-compose/addons/grafana-datasource-jaeger.yaml create mode 100644 website/docs/tutorials/observability/tracing-quickstart.md diff --git a/config/config.tracing.yaml b/config/config.tracing.yaml new file mode 100644 index 00000000..8d9ea682 --- /dev/null +++ b/config/config.tracing.yaml @@ -0,0 +1,104 @@ +# Local Tracing Configuration (Jaeger + Always-On Sampling) +# This config is used by tools/tracing/docker-compose.tracing.yaml via CONFIG_FILE. + +bert_model: + model_id: sentence-transformers/all-MiniLM-L12-v2 + threshold: 0.6 + use_cpu: true + +semantic_cache: + enabled: true + backend_type: "memory" + similarity_threshold: 0.8 + max_entries: 1000 + ttl_seconds: 3600 + eviction_policy: "fifo" + +tools: + enabled: true + top_k: 3 + similarity_threshold: 0.2 + tools_db_path: "config/tools_db.json" + fallback_to_empty: true + +prompt_guard: + enabled: true + use_modernbert: true + model_id: "models/jailbreak_classifier_modernbert-base_model" + threshold: 0.7 + use_cpu: true + jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json" + +vllm_endpoints: + - name: "endpoint1" + address: "127.0.0.1" + port: 8000 + weight: 1 + +model_config: + "openai/gpt-oss-20b": + reasoning_family: "gpt-oss" + preferred_endpoints: ["endpoint1"] + pii_policy: + allow_by_default: true + +classifier: + category_model: + model_id: "models/category_classifier_modernbert-base_model" + use_modernbert: true + threshold: 0.6 + use_cpu: true + category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json" + pii_model: + model_id: "models/pii_classifier_modernbert-base_presidio_token_model" + use_modernbert: true + threshold: 0.7 + use_cpu: true + pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json" + +categories: + - name: math + system_prompt: "You are a mathematics expert. Provide step-by-step solutions." + model_scores: + - model: openai/gpt-oss-20b + score: 1.0 + use_reasoning: true + - name: other + system_prompt: "You are a helpful assistant." + model_scores: + - model: openai/gpt-oss-20b + score: 0.7 + use_reasoning: false + +default_model: openai/gpt-oss-20b + +reasoning_families: + gpt-oss: + type: "reasoning_effort" + parameter: "reasoning_effort" + +default_reasoning_effort: high + +api: + batch_classification: + max_batch_size: 100 + concurrency_threshold: 5 + max_concurrency: 8 + metrics: + enabled: true + +observability: + tracing: + enabled: true + provider: "opentelemetry" + exporter: + type: "otlp" + endpoint: "jaeger:4317" # Jaeger gRPC OTLP endpoint inside compose network + insecure: true + sampling: + type: "always_on" # Always sample in local/dev for easy debugging + rate: 1.0 + resource: + service_name: "vllm-semantic-router" + service_version: "dev" + deployment_environment: "local" diff --git a/config/config.yaml b/config/config.yaml index 579b9e35..23b6f067 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -5,11 +5,11 @@ bert_model: semantic_cache: enabled: true - backend_type: "memory" # Options: "memory" or "milvus" + backend_type: "memory" # Options: "memory" or "milvus" similarity_threshold: 0.8 - max_entries: 1000 # Only applies to memory backend + max_entries: 1000 # Only applies to memory backend ttl_seconds: 3600 - eviction_policy: "fifo" + eviction_policy: "fifo" tools: enabled: true @@ -32,13 +32,13 @@ prompt_guard: # NOT supported: domain names (example.com), protocol prefixes (http://), paths (/api), ports in address (use 'port' field) vllm_endpoints: - name: "endpoint1" - address: "127.0.0.1" # IPv4 address - REQUIRED format + address: "127.0.0.1" # IPv4 address - REQUIRED format port: 8000 weight: 1 model_config: "openai/gpt-oss-20b": - reasoning_family: "gpt-oss" # This model uses GPT-OSS reasoning syntax + reasoning_family: "gpt-oss" # This model uses GPT-OSS reasoning syntax preferred_endpoints: ["endpoint1"] pii_policy: allow_by_default: true @@ -65,7 +65,7 @@ categories: model_scores: - model: openai/gpt-oss-20b score: 0.7 - use_reasoning: false # Business performs better without reasoning + use_reasoning: false # Business performs better without reasoning - name: law system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters." model_scores: @@ -89,7 +89,7 @@ categories: model_scores: - model: openai/gpt-oss-20b score: 0.6 - use_reasoning: true # Enable reasoning for complex chemistry + use_reasoning: true # Enable reasoning for complex chemistry - name: history system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis." model_scores: @@ -119,13 +119,13 @@ categories: model_scores: - model: openai/gpt-oss-20b score: 1.0 - use_reasoning: true # Enable reasoning for complex math + use_reasoning: true # Enable reasoning for complex math - name: physics system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate." model_scores: - model: openai/gpt-oss-20b score: 0.7 - use_reasoning: true # Enable reasoning for physics + use_reasoning: true # Enable reasoning for physics - name: computer science system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful." model_scores: @@ -178,23 +178,23 @@ api: detailed_goroutine_tracking: true high_resolution_timing: false sample_rate: 1.0 - duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30] + duration_buckets: + [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30] size_buckets: [1, 2, 5, 10, 20, 50, 100, 200] # Observability Configuration observability: tracing: - enabled: false # Enable distributed tracing (default: false) - provider: "opentelemetry" # Provider: opentelemetry, openinference, openllmetry + enabled: true # Enable distributed tracing for docker-compose stack + provider: "opentelemetry" # Provider: opentelemetry, openinference, openllmetry exporter: - type: "stdout" # Exporter: otlp, jaeger, zipkin, stdout - endpoint: "localhost:4317" # OTLP endpoint (when type: otlp) - insecure: true # Use insecure connection (no TLS) + type: "otlp" # Export spans to Jaeger (via OTLP gRPC) + endpoint: "jaeger:4317" # Jaeger collector inside compose network + insecure: true # Use insecure connection (no TLS) sampling: - type: "always_on" # Sampling: always_on, always_off, probabilistic - rate: 1.0 # Sampling rate for probabilistic (0.0-1.0) + type: "always_on" # Sampling: always_on, always_off, probabilistic + rate: 1.0 # Sampling rate for probabilistic (0.0-1.0) resource: service_name: "vllm-semantic-router" service_version: "v0.1.0" deployment_environment: "development" - diff --git a/deploy/docker-compose/README.md b/deploy/docker-compose/README.md index 3a200167..692c857f 100644 --- a/deploy/docker-compose/README.md +++ b/deploy/docker-compose/README.md @@ -1,8 +1,9 @@ # Main Runtime Compose Stack -This directory contains the primary `docker-compose.yml` used to run the semantic-router stack (router + envoy + optional mock-vllm + observability). +This directory contains the primary `docker-compose.yml` used to run the semantic-router stack (router + envoy + optional mock-vllm + observability: Prometheus/Grafana + Jaeger tracing). ## Path Layout + Because this file lives under `deploy/docker-compose/`, all relative paths to repository resources go two levels up (../../) back to repo root. Example mappings: @@ -30,6 +31,7 @@ docker compose -f deploy/docker-compose/docker-compose.yml down ``` ## Overrides + You can place a `docker-compose.override.yml` at repo root and combine: ```bash @@ -39,4 +41,14 @@ docker compose -f deploy/docker-compose/docker-compose.yml -f docker-compose.ove ## Related Stacks - Local observability only: `tools/observability/docker-compose.obs.yml` -- Tracing stack: `tools/tracing/docker-compose.tracing.yaml` +- Tracing stack (standalone, dev): `tools/tracing/docker-compose.tracing.yaml` + +## Tracing & Grafana + +- Jaeger UI: http://localhost:16686 +- Grafana: http://localhost:3000 (admin/admin) + - Prometheus datasource (default) for metrics + - Jaeger datasource for exploring traces (search service `vllm-semantic-router`) + +By default, the router container uses `config/config.tracing.yaml` (enabled tracing, exporter to Jaeger). +Override with `CONFIG_FILE=/app/config/config.yaml` if you don’t want tracing. diff --git a/deploy/docker-compose/addons/grafana-datasource-jaeger.yaml b/deploy/docker-compose/addons/grafana-datasource-jaeger.yaml new file mode 100644 index 00000000..85f5165e --- /dev/null +++ b/deploy/docker-compose/addons/grafana-datasource-jaeger.yaml @@ -0,0 +1,15 @@ +# Grafana datasource configuration for Jaeger/Tempo tracing +# Provisioned automatically when Grafana starts + +apiVersion: 1 + +datasources: + - name: Jaeger + type: jaeger + access: proxy + url: http://jaeger:16686 + isDefault: false + editable: true + jsonData: + nodeGraph: + enabled: true diff --git a/deploy/docker-compose/docker-compose.yml b/deploy/docker-compose/docker-compose.yml index 98fdb70d..29adf564 100644 --- a/deploy/docker-compose/docker-compose.yml +++ b/deploy/docker-compose/docker-compose.yml @@ -1,4 +1,15 @@ services: + # Jaeger for distributed tracing (OTLP gRPC + UI) + jaeger: + image: jaegertracing/all-in-one:latest + container_name: jaeger + environment: + - COLLECTOR_OTLP_ENABLED=true + ports: + - "4317:4317" # OTLP gRPC + - "16686:16686" # Web UI + networks: + - semantic-network # Semantic Router External Processor Service semantic-router: @@ -11,7 +22,11 @@ services: - ../../models:/app/models:ro environment: - LD_LIBRARY_PATH=/app/lib + # Use main config by default; override via CONFIG_FILE if needed - CONFIG_FILE=${CONFIG_FILE:-/app/config/config.yaml} + # Optional informational envs (router reads YAML for tracing config) + - OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger:4317 + - OTEL_SERVICE_NAME=vllm-semantic-router networks: - semantic-network healthcheck: @@ -89,6 +104,7 @@ services: volumes: - ./addons/grafana.ini:/etc/grafana/grafana.ini:ro - ./addons/grafana-datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro + - ./addons/grafana-datasource-jaeger.yaml:/etc/grafana/provisioning/datasources/datasource_jaeger.yaml:ro - ./addons/grafana-dashboard.yaml:/etc/grafana/provisioning/dashboards/dashboard.yaml:ro - ./addons/llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro - grafana-data:/var/lib/grafana diff --git a/tools/tracing/README.md b/tools/tracing/README.md index 366853b4..b4594b26 100644 --- a/tools/tracing/README.md +++ b/tools/tracing/README.md @@ -3,6 +3,7 @@ This directory provides a local Jaeger + tracing-enabled semantic-router stack for development, debugging, and demonstration. ## Why here? + `tools/tracing` groups this with other local-only utilities (see `tools/observability` for metrics stack). Production deployments should rely on manifests in `deploy/kubernetes` / `openshift` instead of this all-in-one compose. ## Quick Start @@ -31,7 +32,7 @@ curl -X POST http://localhost:8081/v1/chat/completions \ ## View Traces 1. Open Jaeger UI -2. Select service: `vllm-semantic-router` (or `semantic-router` depending on OTEL resource config) +2. Select service: `vllm-semantic-router` (set via resource.service_name in config/config.tracing.yaml) 3. Click "Find Traces" ## Stopping @@ -48,12 +49,15 @@ docker compose -f tools/tracing/docker-compose.tracing.yaml down -v ## Environment Variables -| Variable | Purpose | -|----------|---------| +| Variable | Purpose | +| --------------------------- | ------------------------------- | | OTEL_EXPORTER_OTLP_ENDPOINT | Where spans are exported (gRPC) | -| OTEL_SERVICE_NAME | Logical service name in traces | +| OTEL_SERVICE_NAME | Logical service name in traces | + +Note: the router reads tracing settings from the YAML config (observability.tracing._). The OTEL\__ env vars here are only informational and do not override the YAML. To change exporter endpoint or service name, edit `config/config.tracing.yaml`. ## Relationship with Metrics Stack + If you also want Prometheus/Grafana metrics: ```bash diff --git a/website/docs/tutorials/observability/tracing-quickstart.md b/website/docs/tutorials/observability/tracing-quickstart.md new file mode 100644 index 00000000..1ab5cccd --- /dev/null +++ b/website/docs/tutorials/observability/tracing-quickstart.md @@ -0,0 +1,55 @@ +# Tracing Quickstart + +This guide helps you spin up a local tracing stack and see your first traces in a minute. + +## Prerequisites + +- Docker and Docker Compose + +## Start the local tracing stack + +The repo includes a compose file that starts Jaeger and a tracing-enabled Semantic Router instance. + +- The router uses `config/config.tracing.yaml` which has tracing enabled and the exporter pointed at Jaeger. + +Run: + +```bash +# from repo root +docker compose -f tools/tracing/docker-compose.tracing.yaml up -d +``` + +## Send a test request + +```bash +curl -X POST http://localhost:8081/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "auto", + "messages": [{"role": "user", "content": "What is 2+2?"}] + }' +``` + +## View traces + +1. Open Jaeger UI: http://localhost:16686 +2. Choose service: `vllm-semantic-router` +3. Find traces → click one to inspect spans + +You should see spans like: + +- `semantic_router.request.received` +- `semantic_router.classification` +- `semantic_router.cache.lookup` +- `semantic_router.routing.decision` +- `semantic_router.backend.selection` + +## Customize + +- Change service name or sampling in `config/config.tracing.yaml` under `observability.tracing`. +- To export to another backend (e.g., Tempo), set `exporter.endpoint` and `insecure` accordingly. + +## Troubleshooting + +- No traces? Confirm tracing is enabled in the YAML and Jaeger is reachable at `jaeger:4317` inside the compose network. +- Empty service list in Jaeger? Make one request to generate spans, then refresh. From 3c310c5afb7ee2cd987d52df82cc1fe8ffd32d89 Mon Sep 17 00:00:00 2001 From: JaredforReal Date: Wed, 15 Oct 2025 13:37:52 +0800 Subject: [PATCH 2/7] make: ensure docker-compose-down stops profiled services; add down-core and down-llm-katan targets Signed-off-by: JaredforReal --- tools/make/docker.mk | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tools/make/docker.mk b/tools/make/docker.mk index 975d91f4..ba2e3503 100644 --- a/tools/make/docker.mk +++ b/tools/make/docker.mk @@ -130,9 +130,25 @@ docker-compose-rebuild-llm-katan: docker-compose-up-llm-katan docker-compose-down: @$(LOG_TARGET) - @echo "Stopping docker-compose services..." + @echo "Stopping all docker-compose services (including profiled) and removing orphans..." + # Stop profiled services explicitly (if they were started) + @docker compose --profile llm-katan down --remove-orphans || true + @docker compose --profile testing down --remove-orphans || true + # Finally stop the base stack + @docker compose down --remove-orphans + +# Stop only core services (leave profiled services like llm-katan/testing running) +docker-compose-down-core: + @$(LOG_TARGET) + @echo "Stopping core services only (no profiles; keeps llm-katan/testing if running)..." @docker compose down +# Stop only llm-katan profiled services +docker-compose-down-llm-katan: + @$(LOG_TARGET) + @echo "Stopping llm-katan profiled services..." + @docker compose --profile llm-katan down + # Help target for Docker commands docker-help: @echo "Docker Make Targets:" @@ -152,7 +168,9 @@ docker-help: @echo " docker-compose-rebuild - Force rebuild then start" @echo " docker-compose-rebuild-testing - Force rebuild (testing profile)" @echo " docker-compose-rebuild-llm-katan - Force rebuild (llm-katan profile)" - @echo " docker-compose-down - Stop docker-compose services" + @echo " docker-compose-down - Stop ALL services (base + profiled) and remove orphans" + @echo " docker-compose-down-core - Stop core services only (keeps profiled running)" + @echo " docker-compose-down-llm-katan - Stop only llm-katan profiled services" @echo "" @echo "Environment Variables:" @echo " DOCKER_REGISTRY - Docker registry (default: ghcr.io/vllm-project/semantic-router)" From 5b7357e99e6759d0e3510db9ab394d762b8b561f Mon Sep 17 00:00:00 2001 From: JaredforReal Date: Wed, 15 Oct 2025 18:43:11 +0800 Subject: [PATCH 3/7] add jaeder UI to dahsboard Signed-off-by: JaredforReal --- dashboard/backend/main.go | 47 ++++++++++++ dashboard/frontend/src/App.tsx | 12 ++++ dashboard/frontend/src/components/Layout.tsx | 28 +++++--- dashboard/frontend/src/pages/TracingPage.tsx | 76 ++++++++++++++++++++ deploy/docker-compose/docker-compose.yml | 28 ++++---- tools/make/golang.mk | 34 ++++++++- 6 files changed, 201 insertions(+), 24 deletions(-) create mode 100644 dashboard/frontend/src/pages/TracingPage.tsx diff --git a/dashboard/backend/main.go b/dashboard/backend/main.go index 40abd391..72ded509 100644 --- a/dashboard/backend/main.go +++ b/dashboard/backend/main.go @@ -271,6 +271,7 @@ func main() { routerAPI := flag.String("router_api", env("TARGET_ROUTER_API_URL", "http://localhost:8080"), "Router API base URL") routerMetrics := flag.String("router_metrics", env("TARGET_ROUTER_METRICS_URL", "http://localhost:9190/metrics"), "Router metrics URL") openwebuiURL := flag.String("openwebui", env("TARGET_OPENWEBUI_URL", ""), "Open WebUI base URL") + jaegerURL := flag.String("jaeger", env("TARGET_JAEGER_URL", ""), "Jaeger base URL") flag.Parse() @@ -382,6 +383,31 @@ func main() { log.Printf("Warning: Prometheus URL not configured") } + // Jaeger proxy (optional) - expose full UI under /embedded/jaeger and its static assets under /static/ + if *jaegerURL != "" { + jp, err := newReverseProxy(*jaegerURL, "/embedded/jaeger", false) + if err != nil { + log.Fatalf("jaeger proxy error: %v", err) + } + // Jaeger UI (root UI under /embedded/jaeger) + mux.Handle("/embedded/jaeger", jp) + mux.Handle("/embedded/jaeger/", jp) + + // Jaeger static assets are typically served under /static/* from the same origin + // Provide a passthrough proxy without prefix stripping + jStatic, _ := newReverseProxy(*jaegerURL, "", false) + mux.Handle("/static/", jStatic) + + log.Printf("Jaeger proxy configured: %s; static assets proxied at /static/", *jaegerURL) + } else { + mux.HandleFunc("/embedded/jaeger/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusServiceUnavailable) + w.Write([]byte(`{"error":"Jaeger not configured","message":"TARGET_JAEGER_URL environment variable is not set"}`)) + }) + log.Printf("Info: Jaeger URL not configured (optional)") + } + // Open WebUI proxy (optional) if *openwebuiURL != "" { op, err := newReverseProxy(*openwebuiURL, "/embedded/openwebui", true) @@ -400,6 +426,24 @@ func main() { log.Printf("Info: Open WebUI not configured (optional)") } + // Jaeger proxy (optional) + if *jaegerURL != "" { + jp, err := newReverseProxy(*jaegerURL, "/embedded/jaeger", false) + if err != nil { + log.Fatalf("jaeger proxy error: %v", err) + } + mux.Handle("/embedded/jaeger", jp) + mux.Handle("/embedded/jaeger/", jp) + log.Printf("Jaeger proxy configured: %s", *jaegerURL) + } else { + mux.HandleFunc("/embedded/jaeger/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusServiceUnavailable) + w.Write([]byte(`{"error":"Jaeger not configured","message":"TARGET_JAEGER_URL environment variable is not set"}`)) + }) + log.Printf("Info: Jaeger URL not configured (optional)") + } + addr := ":" + *port log.Printf("Semantic Router Dashboard listening on %s", addr) log.Printf("Static dir: %s", *staticDir) @@ -409,6 +453,9 @@ func main() { if *promURL != "" { log.Printf("Prometheus: %s → /embedded/prometheus/", *promURL) } + if *jaegerURL != "" { + log.Printf("Jaeger: %s → /embedded/jaeger/", *jaegerURL) + } if *openwebuiURL != "" { log.Printf("OpenWebUI: %s → /embedded/openwebui/", *openwebuiURL) } diff --git a/dashboard/frontend/src/App.tsx b/dashboard/frontend/src/App.tsx index fb2a6c65..654b7d45 100644 --- a/dashboard/frontend/src/App.tsx +++ b/dashboard/frontend/src/App.tsx @@ -6,6 +6,7 @@ import MonitoringPage from './pages/MonitoringPage' import ConfigPage from './pages/ConfigPage' import PlaygroundPage from './pages/PlaygroundPage' import TopologyPage from './pages/TopologyPage' +import TracingPage from './pages/TracingPage' import { ConfigSection } from './components/ConfigNav' const App: React.FC = () => { @@ -117,6 +118,17 @@ const App: React.FC = () => { } /> + setConfigSection(section as ConfigSection)} + > + + + } + /> ) diff --git a/dashboard/frontend/src/components/Layout.tsx b/dashboard/frontend/src/components/Layout.tsx index 98cee2aa..9ecca8c0 100644 --- a/dashboard/frontend/src/components/Layout.tsx +++ b/dashboard/frontend/src/components/Layout.tsx @@ -55,14 +55,14 @@ const Layout: React.FC = ({ children, configSection, onConfigSectio {sidebarCollapsed ? ( // 折叠状态:箭头向右 - - + + ) : ( // 展开状态:箭头向左 - - + + )} @@ -98,12 +98,11 @@ const Layout: React.FC = ({ children, configSection, onConfigSectio ].map((section) => (