Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions config/config.tracing.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Local Tracing Configuration (Jaeger + Always-On Sampling)
# This config is used by tools/tracing/docker-compose.tracing.yaml via CONFIG_FILE.

bert_model:
model_id: sentence-transformers/all-MiniLM-L12-v2
threshold: 0.6
use_cpu: true

semantic_cache:
enabled: true
backend_type: "memory"
similarity_threshold: 0.8
max_entries: 1000
ttl_seconds: 3600
eviction_policy: "fifo"

tools:
enabled: true
top_k: 3
similarity_threshold: 0.2
tools_db_path: "config/tools_db.json"
fallback_to_empty: true

prompt_guard:
enabled: true
use_modernbert: true
model_id: "models/jailbreak_classifier_modernbert-base_model"
threshold: 0.7
use_cpu: true
jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"

vllm_endpoints:
- name: "endpoint1"
address: "127.0.0.1"
port: 8000
weight: 1

model_config:
"openai/gpt-oss-20b":
reasoning_family: "gpt-oss"
preferred_endpoints: ["endpoint1"]
pii_policy:
allow_by_default: true

classifier:
category_model:
model_id: "models/category_classifier_modernbert-base_model"
use_modernbert: true
threshold: 0.6
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
pii_model:
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
use_modernbert: true
threshold: 0.7
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"

categories:
- name: math
system_prompt: "You are a mathematics expert. Provide step-by-step solutions."
model_scores:
- model: openai/gpt-oss-20b
score: 1.0
use_reasoning: true
- name: other
system_prompt: "You are a helpful assistant."
model_scores:
- model: openai/gpt-oss-20b
score: 0.7
use_reasoning: false

default_model: openai/gpt-oss-20b

reasoning_families:
gpt-oss:
type: "reasoning_effort"
parameter: "reasoning_effort"

default_reasoning_effort: high

api:
batch_classification:
max_batch_size: 100
concurrency_threshold: 5
max_concurrency: 8
metrics:
enabled: true

observability:
tracing:
enabled: true
provider: "opentelemetry"
exporter:
type: "otlp"
endpoint: "jaeger:4317" # Jaeger gRPC OTLP endpoint inside compose network
insecure: true
sampling:
type: "always_on" # Always sample in local/dev for easy debugging
rate: 1.0
resource:
service_name: "vllm-semantic-router"
service_version: "dev"
deployment_environment: "local"
32 changes: 16 additions & 16 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ bert_model:

semantic_cache:
enabled: true
backend_type: "memory" # Options: "memory" or "milvus"
backend_type: "memory" # Options: "memory" or "milvus"
similarity_threshold: 0.8
max_entries: 1000 # Only applies to memory backend
max_entries: 1000 # Only applies to memory backend
ttl_seconds: 3600
eviction_policy: "fifo"
eviction_policy: "fifo"

tools:
enabled: true
Expand Down Expand Up @@ -65,7 +65,7 @@ categories:
model_scores:
- model: qwen3
score: 0.7
use_reasoning: false # Business performs better without reasoning
use_reasoning: false # Business performs better without reasoning
- name: law
system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
model_scores:
Expand All @@ -89,7 +89,7 @@ categories:
model_scores:
- model: qwen3
score: 0.6
use_reasoning: true # Enable reasoning for complex chemistry
use_reasoning: true # Enable reasoning for complex chemistry
- name: history
system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
model_scores:
Expand Down Expand Up @@ -119,13 +119,13 @@ categories:
model_scores:
- model: qwen3
score: 1.0
use_reasoning: true # Enable reasoning for complex math
use_reasoning: true # Enable reasoning for complex math
- name: physics
system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
model_scores:
- model: qwen3
score: 0.7
use_reasoning: true # Enable reasoning for physics
use_reasoning: true # Enable reasoning for physics
- name: computer science
system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
model_scores:
Expand Down Expand Up @@ -178,23 +178,23 @@ api:
detailed_goroutine_tracking: true
high_resolution_timing: false
sample_rate: 1.0
duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
duration_buckets:
[0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]

# Observability Configuration
observability:
tracing:
enabled: false # Enable distributed tracing (default: false)
provider: "opentelemetry" # Provider: opentelemetry, openinference, openllmetry
enabled: true # Enable distributed tracing for docker-compose stack
provider: "opentelemetry" # Provider: opentelemetry, openinference, openllmetry
exporter:
type: "stdout" # Exporter: otlp, jaeger, zipkin, stdout
endpoint: "localhost:4317" # OTLP endpoint (when type: otlp)
insecure: true # Use insecure connection (no TLS)
type: "otlp" # Export spans to Jaeger (via OTLP gRPC)
endpoint: "jaeger:4317" # Jaeger collector inside compose network
insecure: true # Use insecure connection (no TLS)
sampling:
type: "always_on" # Sampling: always_on, always_off, probabilistic
rate: 1.0 # Sampling rate for probabilistic (0.0-1.0)
type: "always_on" # Sampling: always_on, always_off, probabilistic
rate: 1.0 # Sampling rate for probabilistic (0.0-1.0)
resource:
service_name: "vllm-semantic-router"
service_version: "v0.1.0"
deployment_environment: "development"

47 changes: 47 additions & 0 deletions dashboard/backend/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ func main() {
routerAPI := flag.String("router_api", env("TARGET_ROUTER_API_URL", "http://localhost:8080"), "Router API base URL")
routerMetrics := flag.String("router_metrics", env("TARGET_ROUTER_METRICS_URL", "http://localhost:9190/metrics"), "Router metrics URL")
openwebuiURL := flag.String("openwebui", env("TARGET_OPENWEBUI_URL", ""), "Open WebUI base URL")
jaegerURL := flag.String("jaeger", env("TARGET_JAEGER_URL", ""), "Jaeger base URL")

flag.Parse()

Expand Down Expand Up @@ -382,6 +383,31 @@ func main() {
log.Printf("Warning: Prometheus URL not configured")
}

// Jaeger proxy (optional) - expose full UI under /embedded/jaeger and its static assets under /static/
if *jaegerURL != "" {
jp, err := newReverseProxy(*jaegerURL, "/embedded/jaeger", false)
if err != nil {
log.Fatalf("jaeger proxy error: %v", err)
}
// Jaeger UI (root UI under /embedded/jaeger)
mux.Handle("/embedded/jaeger", jp)
mux.Handle("/embedded/jaeger/", jp)

// Jaeger static assets are typically served under /static/* from the same origin
// Provide a passthrough proxy without prefix stripping
jStatic, _ := newReverseProxy(*jaegerURL, "", false)
mux.Handle("/static/", jStatic)

log.Printf("Jaeger proxy configured: %s; static assets proxied at /static/", *jaegerURL)
} else {
mux.HandleFunc("/embedded/jaeger/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusServiceUnavailable)
w.Write([]byte(`{"error":"Jaeger not configured","message":"TARGET_JAEGER_URL environment variable is not set"}`))
})
log.Printf("Info: Jaeger URL not configured (optional)")
}

// Open WebUI proxy (optional)
if *openwebuiURL != "" {
op, err := newReverseProxy(*openwebuiURL, "/embedded/openwebui", true)
Expand All @@ -400,6 +426,24 @@ func main() {
log.Printf("Info: Open WebUI not configured (optional)")
}

// Jaeger proxy (optional)
if *jaegerURL != "" {
jp, err := newReverseProxy(*jaegerURL, "/embedded/jaeger", false)
if err != nil {
log.Fatalf("jaeger proxy error: %v", err)
}
mux.Handle("/embedded/jaeger", jp)
mux.Handle("/embedded/jaeger/", jp)
log.Printf("Jaeger proxy configured: %s", *jaegerURL)
} else {
mux.HandleFunc("/embedded/jaeger/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusServiceUnavailable)
w.Write([]byte(`{"error":"Jaeger not configured","message":"TARGET_JAEGER_URL environment variable is not set"}`))
})
log.Printf("Info: Jaeger URL not configured (optional)")
}

addr := ":" + *port
log.Printf("Semantic Router Dashboard listening on %s", addr)
log.Printf("Static dir: %s", *staticDir)
Expand All @@ -409,6 +453,9 @@ func main() {
if *promURL != "" {
log.Printf("Prometheus: %s → /embedded/prometheus/", *promURL)
}
if *jaegerURL != "" {
log.Printf("Jaeger: %s → /embedded/jaeger/", *jaegerURL)
}
if *openwebuiURL != "" {
log.Printf("OpenWebUI: %s → /embedded/openwebui/", *openwebuiURL)
}
Expand Down
12 changes: 12 additions & 0 deletions dashboard/frontend/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import MonitoringPage from './pages/MonitoringPage'
import ConfigPage from './pages/ConfigPage'
import PlaygroundPage from './pages/PlaygroundPage'
import TopologyPage from './pages/TopologyPage'
import TracingPage from './pages/TracingPage'
import { ConfigSection } from './components/ConfigNav'

const App: React.FC = () => {
Expand Down Expand Up @@ -117,6 +118,17 @@ const App: React.FC = () => {
</Layout>
}
/>
<Route
path="/tracing"
element={
<Layout
configSection={configSection}
onConfigSectionChange={(section) => setConfigSection(section as ConfigSection)}
>
<TracingPage />
</Layout>
}
/>
</Routes>
</BrowserRouter>
)
Expand Down
28 changes: 19 additions & 9 deletions dashboard/frontend/src/components/Layout.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,14 @@ const Layout: React.FC<LayoutProps> = ({ children, configSection, onConfigSectio
{sidebarCollapsed ? (
// 折叠状态:箭头向右
<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M14 10L18 10M18 10L16 8M18 10L16 12" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round"/>
<path d="M2 5H10M2 10H10M2 15H10" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round"/>
<path d="M14 10L18 10M18 10L16 8M18 10L16 12" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
<path d="M2 5H10M2 10H10M2 15H10" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" />
</svg>
) : (
// 展开状态:箭头向左
<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M6 10L2 10M2 10L4 8M2 10L4 12" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round"/>
<path d="M10 5H18M10 10H18M10 15H18" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round"/>
<path d="M6 10L2 10M2 10L4 8M2 10L4 12" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
<path d="M10 5H18M10 10H18M10 15H18" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" />
</svg>
)}
</button>
Expand Down Expand Up @@ -98,12 +98,11 @@ const Layout: React.FC<LayoutProps> = ({ children, configSection, onConfigSectio
].map((section) => (
<button
key={section.id}
className={`${styles.navLink} ${
(section.id === 'topology' && location.pathname === '/topology') ||
(isConfigPage && configSection === section.id)
className={`${styles.navLink} ${(section.id === 'topology' && location.pathname === '/topology') ||
(isConfigPage && configSection === section.id)
? styles.navLinkActive
: ''
}`}
}`}
onClick={() => {
if (section.id === 'topology') {
navigate('/topology')
Expand Down Expand Up @@ -131,6 +130,17 @@ const Layout: React.FC<LayoutProps> = ({ children, configSection, onConfigSectio
<span className={styles.navIcon}>📊</span>
{!sidebarCollapsed && <span className={styles.navText}>Monitoring</span>}
</NavLink>

<NavLink
to="/tracing"
className={({ isActive }) =>
isActive ? `${styles.navLink} ${styles.navLinkActive}` : styles.navLink
}
title="Tracing"
>
<span className={styles.navIcon}>🔎</span>
{!sidebarCollapsed && <span className={styles.navText}>Tracing</span>}
</NavLink>
</nav>
<div className={styles.sidebarFooter}>
<button
Expand All @@ -152,7 +162,7 @@ const Layout: React.FC<LayoutProps> = ({ children, configSection, onConfigSectio
title="GitHub Repository"
>
<svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor">
<path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"/>
<path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z" />
</svg>
</a>
<a
Expand Down
Loading
Loading