diff --git a/README.md b/README.md
index de3f7bf9..c5780cc3 100644
--- a/README.md
+++ b/README.md
@@ -62,6 +62,18 @@ Detect if the prompt is a jailbreak prompt, avoiding sending jailbreak prompts t
 
 Cache the semantic representation of the prompt so as to reduce the number of prompt tokens and improve the overall inference latency.
 
+### Distributed Tracing 🔍
+
+Comprehensive observability with OpenTelemetry distributed tracing provides fine-grained visibility into the request processing pipeline:
+
+- **Request Flow Tracing**: Track requests through classification, security checks, caching, and routing
+- **Performance Analysis**: Identify bottlenecks with detailed timing for each operation
+- **Security Monitoring**: Trace PII detection and jailbreak prevention operations
+- **Routing Decisions**: Understand why specific models were selected
+- **OpenTelemetry Standard**: Industry-standard tracing with support for Jaeger, Tempo, and other OTLP backends
+
+See [Distributed Tracing Guide](https://vllm-semantic-router.com/docs/tutorials/observability/distributed-tracing/) for complete setup instructions.
+
 ## Documentation 📖
 
 For comprehensive documentation including detailed setup instructions, architecture guides, and API references, visit:
@@ -74,6 +86,7 @@ The documentation includes:
 - **[System Architecture](https://vllm-semantic-router.com/docs/overview/architecture/system-architecture/)** - Technical deep dive
 - **[Model Training](https://vllm-semantic-router.com/docs/training/training-overview/)** - How classification models work
 - **[API Reference](https://vllm-semantic-router.com/docs/api/router/)** - Complete API documentation
+- **[Distributed Tracing](https://vllm-semantic-router.com/docs/tutorials/observability/distributed-tracing/)** - Observability and debugging guide
 
 ## Community 👋
 
diff --git a/config/config.development.yaml b/config/config.development.yaml
new file mode 100644
index 00000000..3bec3828
--- /dev/null
+++ b/config/config.development.yaml
@@ -0,0 +1,97 @@
+# Development Configuration Example with Stdout Tracing
+# This configuration enables distributed tracing with stdout exporter
+# for local development and debugging.
+
+bert_model:
+  model_id: sentence-transformers/all-MiniLM-L12-v2
+  threshold: 0.6
+  use_cpu: true
+
+semantic_cache:
+  enabled: true
+  backend_type: "memory"
+  similarity_threshold: 0.8
+  max_entries: 100
+  ttl_seconds: 600
+  eviction_policy: "fifo"
+
+tools:
+  enabled: false
+  top_k: 3
+  similarity_threshold: 0.2
+  tools_db_path: "config/tools_db.json"
+  fallback_to_empty: true
+
+prompt_guard:
+  enabled: false
+
+vllm_endpoints:
+  - name: "local-endpoint"
+    address: "127.0.0.1"
+    port: 8000
+    models:
+      - "test-model"
+    weight: 1
+
+model_config:
+  "test-model":
+    pii_policy:
+      allow_by_default: true
+
+classifier:
+  category_model:
+    model_id: "models/category_classifier_modernbert-base_model"
+    use_modernbert: true
+    threshold: 0.6
+    use_cpu: true
+    category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
+
+categories:
+  - name: test
+    system_prompt: "You are a test assistant."
+    model_scores:
+      - model: test-model
+        score: 1.0
+        use_reasoning: false
+
+default_model: test-model
+
+api:
+  batch_classification:
+    max_batch_size: 10
+    metrics:
+      enabled: true
+
+# Observability Configuration - Development with Stdout
+observability:
+  tracing:
+    # Enable tracing for development/debugging
+    enabled: true
+    
+    # OpenTelemetry provider
+    provider: "opentelemetry"
+    
+    exporter:
+      # Stdout exporter prints traces to console (great for debugging)
+      type: "stdout"
+      
+      # No endpoint needed for stdout
+      # endpoint: ""
+      # insecure: true
+    
+    sampling:
+      # Always sample in development to see all traces
+      type: "always_on"
+      
+      # Rate not used for always_on
+      # rate: 1.0
+    
+    resource:
+      # Service name for trace identification
+      service_name: "vllm-semantic-router-dev"
+      
+      # Version for development
+      service_version: "dev"
+      
+      # Environment identifier
+      deployment_environment: "development"
diff --git a/config/config.production.yaml b/config/config.production.yaml
new file mode 100644
index 00000000..07258956
--- /dev/null
+++ b/config/config.production.yaml
@@ -0,0 +1,132 @@
+# Production Configuration Example with OTLP Tracing
+# This configuration enables distributed tracing with OpenTelemetry OTLP exporter
+# for production deployment with Jaeger or other OTLP-compatible backends.
+
+bert_model:
+  model_id: sentence-transformers/all-MiniLM-L12-v2
+  threshold: 0.6
+  use_cpu: true
+
+semantic_cache:
+  enabled: true
+  backend_type: "memory"
+  similarity_threshold: 0.8
+  max_entries: 1000
+  ttl_seconds: 3600
+  eviction_policy: "fifo"
+
+tools:
+  enabled: true
+  top_k: 3
+  similarity_threshold: 0.2
+  tools_db_path: "config/tools_db.json"
+  fallback_to_empty: true
+
+prompt_guard:
+  enabled: true
+  use_modernbert: true
+  model_id: "models/jailbreak_classifier_modernbert-base_model"
+  threshold: 0.7
+  use_cpu: true
+  jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
+
+vllm_endpoints:
+  - name: "endpoint1"
+    address: "127.0.0.1"
+    port: 8000
+    models:
+      - "openai/gpt-oss-20b"
+    weight: 1
+
+model_config:
+  "openai/gpt-oss-20b":
+    reasoning_family: "gpt-oss"
+    preferred_endpoints: ["endpoint1"]
+    pii_policy:
+      allow_by_default: true
+
+classifier:
+  category_model:
+    model_id: "models/category_classifier_modernbert-base_model"
+    use_modernbert: true
+    threshold: 0.6
+    use_cpu: true
+    category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
+  pii_model:
+    model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
+    use_modernbert: true
+    threshold: 0.7
+    use_cpu: true
+    pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
+
+categories:
+  - name: math
+    system_prompt: "You are a mathematics expert. Provide step-by-step solutions."
+    model_scores:
+      - model: openai/gpt-oss-20b
+        score: 1.0
+        use_reasoning: true
+  - name: other
+    system_prompt: "You are a helpful assistant."
+    model_scores:
+      - model: openai/gpt-oss-20b
+        score: 0.7
+        use_reasoning: false
+
+default_model: openai/gpt-oss-20b
+
+reasoning_families:
+  gpt-oss:
+    type: "reasoning_effort"
+    parameter: "reasoning_effort"
+
+default_reasoning_effort: high
+
+api:
+  batch_classification:
+    max_batch_size: 100
+    concurrency_threshold: 5
+    max_concurrency: 8
+    metrics:
+      enabled: true
+
+# Observability Configuration - Production with OTLP
+observability:
+  tracing:
+    # Enable distributed tracing for production monitoring
+    enabled: true
+    
+    # OpenTelemetry provider (standard implementation)
+    provider: "opentelemetry"
+    
+    exporter:
+      # OTLP exporter for Jaeger, Tempo, or other OTLP backends
+      type: "otlp"
+      
+      # Jaeger OTLP endpoint (default: 4317 for gRPC)
+      # For Jaeger: localhost:4317
+      # For Grafana Tempo: tempo:4317
+      # For Datadog: trace-agent:4317
+      endpoint: "jaeger:4317"
+      
+      # Use insecure connection (set to false in production with TLS)
+      insecure: true
+    
+    sampling:
+      # Probabilistic sampling for production (reduces overhead)
+      type: "probabilistic"
+      
+      # Sample 10% of requests (adjust based on traffic volume)
+      # Higher rates (0.5-1.0) for low traffic
+      # Lower rates (0.01-0.1) for high traffic
+      rate: 0.1
+    
+    resource:
+      # Service name for trace identification
+      service_name: "vllm-semantic-router"
+      
+      # Version for tracking deployments
+      service_version: "v0.1.0"
+      
+      # Environment identifier
+      deployment_environment: "production"
diff --git a/config/config.yaml b/config/config.yaml
index 29f4eea8..9b814cdc 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -182,3 +182,21 @@ api:
       sample_rate: 1.0
       duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
       size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
+
+# Observability Configuration
+observability:
+  tracing:
+    enabled: false  # Enable distributed tracing (default: false)
+    provider: "opentelemetry"  # Provider: opentelemetry, openinference, openllmetry
+    exporter:
+      type: "stdout"  # Exporter: otlp, jaeger, zipkin, stdout
+      endpoint: "localhost:4317"  # OTLP endpoint (when type: otlp)
+      insecure: true  # Use insecure connection (no TLS)
+    sampling:
+      type: "always_on"  # Sampling: always_on, always_off, probabilistic
+      rate: 1.0  # Sampling rate for probabilistic (0.0-1.0)
+    resource:
+      service_name: "vllm-semantic-router"
+      service_version: "v0.1.0"
+      deployment_environment: "development"
+
diff --git a/deploy/docker-compose.tracing.yaml b/deploy/docker-compose.tracing.yaml
new file mode 100644
index 00000000..9522221f
--- /dev/null
+++ b/deploy/docker-compose.tracing.yaml
@@ -0,0 +1,55 @@
+version: '3.8'
+
+services:
+  # Jaeger all-in-one for distributed tracing
+  jaeger:
+    image: jaegertracing/all-in-one:latest
+    container_name: jaeger
+    ports:
+      - "4317:4317"   # OTLP gRPC
+      - "4318:4318"   # OTLP HTTP
+      - "16686:16686" # Jaeger UI
+      - "14268:14268" # Jaeger collector
+    environment:
+      - COLLECTOR_OTLP_ENABLED=true
+    networks:
+      - router-network
+
+  # Semantic Router with tracing enabled
+  semantic-router:
+    image: vllm-semantic-router:latest
+    container_name: semantic-router
+    depends_on:
+      - jaeger
+    ports:
+      - "50051:50051" # gRPC ExtProc
+      - "8080:8080"   # Classification API
+      - "9190:9190"   # Metrics
+    volumes:
+      - ./config:/config
+    environment:
+      - CONFIG_PATH=/config/config.tracing.yaml
+    networks:
+      - router-network
+
+  # Grafana for visualization
+  grafana:
+    image: grafana/grafana:latest
+    container_name: grafana
+    ports:
+      - "3000:3000"
+    environment:
+      - GF_AUTH_ANONYMOUS_ENABLED=true
+      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
+    volumes:
+      - ./grafana/provisioning:/etc/grafana/provisioning
+      - grafana-storage:/var/lib/grafana
+    networks:
+      - router-network
+
+networks:
+  router-network:
+    driver: bridge
+
+volumes:
+  grafana-storage:
diff --git a/deploy/tracing/README.md b/deploy/tracing/README.md
new file mode 100644
index 00000000..51927f30
--- /dev/null
+++ b/deploy/tracing/README.md
@@ -0,0 +1,155 @@
+# Distributed Tracing Deployment Example
+
+This directory contains an example deployment configuration for testing distributed tracing with Jaeger.
+
+## Quick Start
+
+1. **Start the services**:
+
+```bash
+docker-compose -f ../docker-compose.tracing.yaml up -d
+```
+
+2. **Access the UIs**:
+
+- Jaeger UI: http://localhost:16686
+- Grafana: http://localhost:3000
+- Router API: http://localhost:8080
+
+3. **Send test requests**:
+
+```bash
+# Example request
+curl -X POST http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "auto",
+    "messages": [{"role": "user", "content": "What is 2+2?"}]
+  }'
+```
+
+4. **View traces in Jaeger**:
+
+- Navigate to http://localhost:16686
+- Select service: `vllm-semantic-router`
+- Click "Find Traces"
+
+## Configuration
+
+The router is configured with:
+
+```yaml
+observability:
+  tracing:
+    enabled: true
+    provider: "opentelemetry"
+    exporter:
+      type: "otlp"
+      endpoint: "jaeger:4317"
+      insecure: true
+    sampling:
+      type: "always_on"
+    resource:
+      service_name: "vllm-semantic-router"
+```
+
+## Services
+
+### Jaeger
+
+- **OTLP gRPC**: Port 4317
+- **OTLP HTTP**: Port 4318
+- **Jaeger UI**: Port 16686
+- **Collector**: Port 14268
+
+### Semantic Router
+
+- **gRPC ExtProc**: Port 50051
+- **Classification API**: Port 8080
+- **Metrics**: Port 9190
+
+### Grafana
+
+- **Web UI**: Port 3000
+- Default credentials: admin/admin
+- Pre-configured with Jaeger data source
+
+## Trace Examples
+
+### Request Flow
+
+```
+semantic_router.request.received [2ms]
+├─ semantic_router.classification [45ms]
+│  └─ category: math, confidence: 0.95
+├─ semantic_router.security.jailbreak_detection [12ms]
+│  └─ jailbreak.detected: false
+├─ semantic_router.cache.lookup [3ms]
+│  └─ cache.hit: false
+├─ semantic_router.routing.decision [5ms]
+│  └─ selected_model: gpt-4, reasoning: true
+└─ semantic_router.backend.selection [2ms]
+   └─ endpoint: endpoint1
+```
+
+### Key Attributes
+
+- `request.id`: Unique request identifier
+- `category.name`: Classified category
+- `routing.selected_model`: Selected model
+- `reasoning.enabled`: Reasoning mode
+- `cache.hit`: Cache hit status
+
+## Stopping Services
+
+```bash
+docker-compose -f ../docker-compose.tracing.yaml down
+```
+
+To remove volumes:
+
+```bash
+docker-compose -f ../docker-compose.tracing.yaml down -v
+```
+
+## Troubleshooting
+
+### Traces not appearing
+
+1. Check Jaeger is running:
+
+```bash
+curl http://localhost:16686
+```
+
+2. Verify router can connect to Jaeger:
+
+```bash
+docker logs semantic-router | grep -i tracing
+```
+
+3. Check for initialization message:
+
+```
+Distributed tracing initialized (provider: opentelemetry, exporter: otlp)
+```
+
+### Router fails to start
+
+1. Check configuration:
+
+```bash
+docker logs semantic-router
+```
+
+2. Verify Jaeger is ready:
+
+```bash
+docker logs jaeger
+```
+
+## Next Steps
+
+- [Full Tracing Documentation](../../website/docs/tutorials/observability/distributed-tracing.md)
+- [Quick Start Guide](../../website/docs/tutorials/observability/tracing-quickstart.md)
+- [Configuration Reference](../../config/config.production.yaml)
diff --git a/src/semantic-router/cmd/main.go b/src/semantic-router/cmd/main.go
index f8a0fb67..e41dbfcf 100644
--- a/src/semantic-router/cmd/main.go
+++ b/src/semantic-router/cmd/main.go
@@ -1,13 +1,18 @@
 package main
 
 import (
+	"context"
 	"flag"
 	"fmt"
 	"net/http"
 	"os"
+	"os/signal"
+	"syscall"
+	"time"
 
 	"github.com/prometheus/client_golang/prometheus/promhttp"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/api"
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/extproc"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
 )
@@ -37,6 +42,56 @@ func main() {
 		observability.Fatalf("Config file not found: %s", *configPath)
 	}
 
+	// Load configuration to initialize tracing
+	cfg, err := config.ParseConfigFile(*configPath)
+	if err != nil {
+		observability.Fatalf("Failed to load config: %v", err)
+	}
+
+	// Initialize distributed tracing if enabled
+	ctx := context.Background()
+	if cfg.Observability.Tracing.Enabled {
+		tracingCfg := observability.TracingConfig{
+			Enabled:               cfg.Observability.Tracing.Enabled,
+			Provider:              cfg.Observability.Tracing.Provider,
+			ExporterType:          cfg.Observability.Tracing.Exporter.Type,
+			ExporterEndpoint:      cfg.Observability.Tracing.Exporter.Endpoint,
+			ExporterInsecure:      cfg.Observability.Tracing.Exporter.Insecure,
+			SamplingType:          cfg.Observability.Tracing.Sampling.Type,
+			SamplingRate:          cfg.Observability.Tracing.Sampling.Rate,
+			ServiceName:           cfg.Observability.Tracing.Resource.ServiceName,
+			ServiceVersion:        cfg.Observability.Tracing.Resource.ServiceVersion,
+			DeploymentEnvironment: cfg.Observability.Tracing.Resource.DeploymentEnvironment,
+		}
+		if err := observability.InitTracing(ctx, tracingCfg); err != nil {
+			observability.Warnf("Failed to initialize tracing: %v", err)
+		}
+
+		// Set up graceful shutdown for tracing
+		defer func() {
+			shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+			defer cancel()
+			if err := observability.ShutdownTracing(shutdownCtx); err != nil {
+				observability.Errorf("Failed to shutdown tracing: %v", err)
+			}
+		}()
+	}
+
+	// Set up signal handling for graceful shutdown
+	sigChan := make(chan os.Signal, 1)
+	signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
+
+	go func() {
+		<-sigChan
+		observability.Infof("Received shutdown signal, cleaning up...")
+		shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+		if err := observability.ShutdownTracing(shutdownCtx); err != nil {
+			observability.Errorf("Failed to shutdown tracing: %v", err)
+		}
+		os.Exit(0)
+	}()
+
 	// Start metrics server
 	go func() {
 		http.Handle("/metrics", promhttp.Handler())
diff --git a/src/semantic-router/go.mod b/src/semantic-router/go.mod
index 432fd110..20bf1da0 100644
--- a/src/semantic-router/go.mod
+++ b/src/semantic-router/go.mod
@@ -20,18 +20,24 @@ require (
 	github.com/openai/openai-go v1.12.0
 	github.com/prometheus/client_golang v1.23.0
 	github.com/prometheus/client_model v0.6.2
-	github.com/stretchr/testify v1.10.0
+	github.com/stretchr/testify v1.11.1
 	github.com/vllm-project/semantic-router/candle-binding v0.0.0-00010101000000-000000000000
+	go.opentelemetry.io/otel v1.38.0
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0
+	go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0
+	go.opentelemetry.io/otel/sdk v1.38.0
+	go.opentelemetry.io/otel/trace v1.38.0
 	go.uber.org/zap v1.27.0
-	google.golang.org/grpc v1.71.1
+	google.golang.org/grpc v1.75.0
 	gopkg.in/yaml.v3 v3.0.1
 	k8s.io/apimachinery v0.31.4
 )
 
 require (
 	github.com/beorn7/perks v1.0.1 // indirect
+	github.com/cenkalti/backoff/v5 v5.0.3 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
-	github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3 // indirect
+	github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 // indirect
 	github.com/cockroachdb/errors v1.9.1 // indirect
 	github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f // indirect
 	github.com/cockroachdb/redact v1.1.3 // indirect
@@ -39,14 +45,17 @@ require (
 	github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
 	github.com/fxamacker/cbor/v2 v2.7.0 // indirect
 	github.com/getsentry/sentry-go v0.12.0 // indirect
-	github.com/go-logr/logr v1.4.2 // indirect
+	github.com/go-logr/logr v1.4.3 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/protobuf v1.5.4 // indirect
 	github.com/google/go-cmp v0.7.0 // indirect
 	github.com/google/gofuzz v1.2.0 // indirect
 	github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
+	github.com/google/uuid v1.6.0 // indirect
 	github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/kr/pretty v0.3.1 // indirect
 	github.com/kr/text v0.2.0 // indirect
@@ -59,21 +68,26 @@ require (
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
 	github.com/prometheus/common v0.65.0 // indirect
 	github.com/prometheus/procfs v0.16.1 // indirect
-	github.com/rogpeppe/go-internal v1.12.0 // indirect
+	github.com/rogpeppe/go-internal v1.13.1 // indirect
 	github.com/tidwall/gjson v1.14.4 // indirect
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.1 // indirect
 	github.com/tidwall/sjson v1.2.5 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
+	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
+	go.opentelemetry.io/otel/metric v1.38.0 // indirect
+	go.opentelemetry.io/proto/otlp v1.7.1 // indirect
 	go.uber.org/automaxprocs v1.6.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
-	golang.org/x/net v0.41.0 // indirect
-	golang.org/x/sync v0.15.0 // indirect
-	golang.org/x/sys v0.33.0 // indirect
-	golang.org/x/text v0.26.0 // indirect
-	golang.org/x/tools v0.33.0 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f // indirect
-	google.golang.org/protobuf v1.36.6 // indirect
+	golang.org/x/net v0.43.0 // indirect
+	golang.org/x/sync v0.16.0 // indirect
+	golang.org/x/sys v0.35.0 // indirect
+	golang.org/x/text v0.28.0 // indirect
+	golang.org/x/tools v0.35.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20250929231259-57b25ae835d4 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9 // indirect
+	google.golang.org/protobuf v1.36.9 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	k8s.io/klog/v2 v2.130.1 // indirect
diff --git a/src/semantic-router/go.sum b/src/semantic-router/go.sum
index 45534e65..af77d0b8 100644
--- a/src/semantic-router/go.sum
+++ b/src/semantic-router/go.sum
@@ -10,14 +10,16 @@ github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5
 github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
+github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
 github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
-github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3 h1:boJj011Hh+874zpIySeApCX4GeOjPl9qhRF3QuIZq+Q=
-github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
+github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv1aFbZMiM9vblcSArJRf2Irls=
+github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
 github.com/cockroachdb/datadriven v1.0.2/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU=
 github.com/cockroachdb/errors v1.9.1 h1:yFVvsI0VxmRShfawbt/laCIDy/mtTqqnvoNgiy5bEV8=
 github.com/cockroachdb/errors v1.9.1/go.mod h1:2sxOtL2WIc096WSZqZ5h8fa17rdDq9HZOZLBCor4mBk=
@@ -68,8 +70,9 @@ github.com/go-faker/faker/v4 v4.1.0 h1:ffuWmpDrducIUOO0QSKSF5Q2dxAht+dhsT9FvVHhP
 github.com/go-faker/faker/v4 v4.1.0/go.mod h1:uuNc0PSRxF8nMgjGrrrU4Nw5cF30Jc6Kd0/FUTTYbhg=
 github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
 github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
-github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
-github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
+github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
 github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8=
@@ -126,6 +129,8 @@ github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORR
 github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw=
 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs=
 github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
 github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
@@ -237,8 +242,8 @@ github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlT
 github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
 github.com/rogpeppe/go-internal v1.8.1/go.mod h1:JeRgkft04UBgHMgCIwADu4Pn6Mtm5d4nPKWu0nJ5d+o=
 github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
-github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
-github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
+github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
+github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
 github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
 github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
 github.com/schollz/closestmatch v2.1.0+incompatible/go.mod h1:RtP1ddjLong6gTkbtmuhtR2uUrrJOpYzYRvbcPAid+g=
@@ -264,8 +269,8 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
 github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
-github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
 github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/gjson v1.14.4 h1:uo0p8EbA09J7RQaflQ1aBRffTR7xedD2bcIVSYxLnkM=
 github.com/tidwall/gjson v1.14.4/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
@@ -301,16 +306,24 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec
 github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
 go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
 go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
-go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
-go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
-go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
-go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
-go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A=
-go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU=
-go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce1EK0Gyvahk=
-go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w=
-go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
-go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
+go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
+go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk=
+go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0 h1:kJxSDN4SgWWTjG/hPp3O7LCGLcHXFlvS2/FFOrwL+SE=
+go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0/go.mod h1:mgIOzS7iZeKJdeB8/NYHrJ48fdGc71Llo5bJ1J4DWUE=
+go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
+go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
+go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E=
+go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg=
+go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM=
+go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA=
+go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
+go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
+go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4=
+go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE=
 go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
 go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
 go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
@@ -356,8 +369,8 @@ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwY
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
 golang.org/x/net v0.0.0-20211008194852-3b03d305991f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw=
-golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA=
+golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
+golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -366,8 +379,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8=
-golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
+golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -390,8 +403,8 @@ golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
-golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
+golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
@@ -399,8 +412,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M=
-golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA=
+golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
+golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
 golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20181221001348-537d06c36207/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -415,12 +428,14 @@ golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapK
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
-golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc=
-golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI=
+golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0=
+golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
+gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
 google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/genproto v0.0.0-20180518175338-11a468237815/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
@@ -429,8 +444,10 @@ google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98
 google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
 google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
 google.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f h1:OxYkA3wjPsZyBylwymxSHa7ViiW1Sml4ToBrncvFehI=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f/go.mod h1:+2Yz8+CLJbIfL9z73EW45avw8Lmge3xVElCP9zEKi50=
+google.golang.org/genproto/googleapis/api v0.0.0-20250929231259-57b25ae835d4 h1:8XJ4pajGwOlasW+L13MnEGA8W4115jJySQtVfS2/IBU=
+google.golang.org/genproto/googleapis/api v0.0.0-20250929231259-57b25ae835d4/go.mod h1:NnuHhy+bxcg30o7FnVAZbXsPHUDQ9qKWAQKCD7VxFtk=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9 h1:V1jCN2HBa8sySkR5vLcCSqJSTMv093Rw9EJefhQGP7M=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9/go.mod h1:HSkG/KdJWusxU1F6CNrwNDjBMgisKxGnc5dAZfT0mjQ=
 google.golang.org/grpc v1.12.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
 google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
 google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
@@ -438,8 +455,8 @@ google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQ
 google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
 google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
 google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=
-google.golang.org/grpc v1.71.1 h1:ffsFWr7ygTUscGPI0KKK6TLrGz0476KUvvsbqWK0rPI=
-google.golang.org/grpc v1.71.1/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec=
+google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4=
+google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ=
 google.golang.org/grpc/examples v0.0.0-20220617181431-3e7b97febc7f h1:rqzndB2lIQGivcXdTuY3Y9NBvr70X+y77woofSRluec=
 google.golang.org/grpc/examples v0.0.0-20220617181431-3e7b97febc7f/go.mod h1:gxndsbNG1n4TZcHGgsYEfVGnTxqfEdfiDv6/DADXX9o=
 google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
@@ -453,8 +470,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
-google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
+google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw=
+google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go
index 78edc546..6720c6a0 100644
--- a/src/semantic-router/pkg/config/config.go
+++ b/src/semantic-router/pkg/config/config.go
@@ -87,6 +87,9 @@ type RouterConfig struct {
 
 	// API configuration for classification endpoints
 	API APIConfig `yaml:"api"`
+
+	// Observability configuration for tracing, metrics, and logging
+	Observability ObservabilityConfig `yaml:"observability"`
 }
 
 // APIConfig represents configuration for API endpoints
@@ -98,6 +101,63 @@ type APIConfig struct {
 	} `yaml:"batch_classification"`
 }
 
+// ObservabilityConfig represents configuration for observability features
+type ObservabilityConfig struct {
+	// Tracing configuration for distributed tracing
+	Tracing TracingConfig `yaml:"tracing"`
+}
+
+// TracingConfig represents configuration for distributed tracing
+type TracingConfig struct {
+	// Enable distributed tracing
+	Enabled bool `yaml:"enabled"`
+
+	// Provider type (opentelemetry, openinference, openllmetry)
+	Provider string `yaml:"provider,omitempty"`
+
+	// Exporter configuration
+	Exporter TracingExporterConfig `yaml:"exporter"`
+
+	// Sampling configuration
+	Sampling TracingSamplingConfig `yaml:"sampling"`
+
+	// Resource attributes
+	Resource TracingResourceConfig `yaml:"resource"`
+}
+
+// TracingExporterConfig represents exporter configuration
+type TracingExporterConfig struct {
+	// Exporter type (otlp, jaeger, zipkin, stdout)
+	Type string `yaml:"type"`
+
+	// Endpoint for the exporter (e.g., localhost:4317 for OTLP)
+	Endpoint string `yaml:"endpoint,omitempty"`
+
+	// Use insecure connection (no TLS)
+	Insecure bool `yaml:"insecure,omitempty"`
+}
+
+// TracingSamplingConfig represents sampling configuration
+type TracingSamplingConfig struct {
+	// Sampling type (always_on, always_off, probabilistic)
+	Type string `yaml:"type"`
+
+	// Sampling rate for probabilistic sampling (0.0 to 1.0)
+	Rate float64 `yaml:"rate,omitempty"`
+}
+
+// TracingResourceConfig represents resource attributes
+type TracingResourceConfig struct {
+	// Service name
+	ServiceName string `yaml:"service_name"`
+
+	// Service version
+	ServiceVersion string `yaml:"service_version,omitempty"`
+
+	// Deployment environment
+	DeploymentEnvironment string `yaml:"deployment_environment,omitempty"`
+}
+
 // BatchClassificationMetricsConfig represents configuration for batch classification metrics
 type BatchClassificationMetricsConfig struct {
 	// Sample rate for metrics collection (0.0-1.0, 1.0 means collect all metrics)
diff --git a/src/semantic-router/pkg/extproc/request_handler.go b/src/semantic-router/pkg/extproc/request_handler.go
index 46490ff5..d8cb4336 100644
--- a/src/semantic-router/pkg/extproc/request_handler.go
+++ b/src/semantic-router/pkg/extproc/request_handler.go
@@ -1,6 +1,7 @@
 package extproc
 
 import (
+	"context"
 	"encoding/json"
 	"strings"
 	"time"
@@ -9,6 +10,8 @@ import (
 	ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 	typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3"
 	"github.com/openai/openai-go"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/trace"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/status"
 
@@ -250,6 +253,9 @@ type RequestContext struct {
 	VSRSelectedModel        string // The model selected by VSR
 	VSRCacheHit             bool   // Whether this request hit the cache
 	VSRInjectedSystemPrompt bool   // Whether a system prompt was injected into the request
+
+	// Tracing context
+	TraceContext context.Context // OpenTelemetry trace context for span propagation
 }
 
 // handleRequestHeaders processes the request headers
@@ -258,6 +264,26 @@ func (r *OpenAIRouter) handleRequestHeaders(v *ext_proc.ProcessingRequest_Reques
 	ctx.StartTime = time.Now()
 	observability.Infof("Received request headers")
 
+	// Initialize trace context from incoming headers
+	baseCtx := context.Background()
+	headerMap := make(map[string]string)
+	for _, h := range v.RequestHeaders.Headers.Headers {
+		headerValue := h.Value
+		if headerValue == "" && len(h.RawValue) > 0 {
+			headerValue = string(h.RawValue)
+		}
+		headerMap[h.Key] = headerValue
+	}
+
+	// Extract trace context from headers (if present)
+	ctx.TraceContext = observability.ExtractTraceContext(baseCtx, headerMap)
+
+	// Start root span for the request
+	spanCtx, span := observability.StartSpan(ctx.TraceContext, observability.SpanRequestReceived,
+		trace.WithSpanKind(trace.SpanKindServer))
+	ctx.TraceContext = spanCtx
+	defer span.End()
+
 	// Store headers for later use
 	headers := v.RequestHeaders.Headers
 	for _, h := range headers.Headers {
@@ -275,6 +301,18 @@ func (r *OpenAIRouter) handleRequestHeaders(v *ext_proc.ProcessingRequest_Reques
 		}
 	}
 
+	// Set request metadata on span
+	if ctx.RequestID != "" {
+		observability.SetSpanAttributes(span,
+			attribute.String(observability.AttrRequestID, ctx.RequestID))
+	}
+
+	method := ctx.Headers[":method"]
+	path := ctx.Headers[":path"]
+	observability.SetSpanAttributes(span,
+		attribute.String(observability.AttrHTTPMethod, method),
+		attribute.String(observability.AttrHTTPPath, path))
+
 	// Detect if the client expects a streaming response (SSE)
 	if accept, ok := ctx.Headers["accept"]; ok {
 		if strings.Contains(strings.ToLower(accept), "text/event-stream") {
@@ -284,9 +322,6 @@ func (r *OpenAIRouter) handleRequestHeaders(v *ext_proc.ProcessingRequest_Reques
 	}
 
 	// Check if this is a GET request to /v1/models
-	method := ctx.Headers[":method"]
-	path := ctx.Headers[":path"]
-
 	if method == "GET" && strings.HasPrefix(path, "/v1/models") {
 		observability.Infof("Handling /v1/models request with path: %s", path)
 		return r.handleModelsRequest(path)
@@ -341,6 +376,14 @@ func (r *OpenAIRouter) handleRequestBody(v *ext_proc.ProcessingRequest_RequestBo
 	originalModel := string(openAIRequest.Model)
 	observability.Infof("Original model: %s", originalModel)
 
+	// Set model on span
+	if ctx.TraceContext != nil {
+		_, span := observability.StartSpan(ctx.TraceContext, "parse_request")
+		observability.SetSpanAttributes(span,
+			attribute.String(observability.AttrOriginalModel, originalModel))
+		span.End()
+	}
+
 	// Record the initial request to this model (count all requests)
 	metrics.RecordModelRequest(originalModel)
 	// Also set the model on context early so error metrics can label it
@@ -372,9 +415,20 @@ func (r *OpenAIRouter) performSecurityChecks(ctx *RequestContext, userContent st
 
 	// Perform jailbreak detection on all message content
 	if r.Classifier.IsJailbreakEnabled() {
+		// Start jailbreak detection span
+		spanCtx, span := observability.StartSpan(ctx.TraceContext, observability.SpanJailbreakDetection)
+		defer span.End()
+
+		startTime := time.Now()
 		hasJailbreak, jailbreakDetections, err := r.Classifier.AnalyzeContentForJailbreak(allContent)
+		detectionTime := time.Since(startTime).Milliseconds()
+
+		observability.SetSpanAttributes(span,
+			attribute.Int64(observability.AttrJailbreakDetectionTimeMs, detectionTime))
+
 		if err != nil {
 			observability.Errorf("Error performing jailbreak analysis: %v", err)
+			observability.RecordError(span, err)
 			// Continue processing despite jailbreak analysis error
 			metrics.RecordRequestError(ctx.RequestModel, "classification_failed")
 		} else if hasJailbreak {
@@ -389,6 +443,11 @@ func (r *OpenAIRouter) performSecurityChecks(ctx *RequestContext, userContent st
 				}
 			}
 
+			observability.SetSpanAttributes(span,
+				attribute.Bool(observability.AttrJailbreakDetected, true),
+				attribute.String(observability.AttrJailbreakType, jailbreakType),
+				attribute.String(observability.AttrSecurityAction, "blocked"))
+
 			observability.Warnf("JAILBREAK ATTEMPT BLOCKED: %s (confidence: %.3f)", jailbreakType, confidence)
 
 			// Return immediate jailbreak violation response
@@ -402,9 +461,13 @@ func (r *OpenAIRouter) performSecurityChecks(ctx *RequestContext, userContent st
 			// Count this as a blocked request
 			metrics.RecordRequestError(ctx.RequestModel, "jailbreak_block")
 			jailbreakResponse := http.CreateJailbreakViolationResponse(jailbreakType, confidence)
+			ctx.TraceContext = spanCtx
 			return jailbreakResponse, true
 		} else {
+			observability.SetSpanAttributes(span,
+				attribute.Bool(observability.AttrJailbreakDetected, false))
 			observability.Infof("No jailbreak detected in request content")
+			ctx.TraceContext = spanCtx
 		}
 	}
 
@@ -425,10 +488,23 @@ func (r *OpenAIRouter) handleCaching(ctx *RequestContext) (*ext_proc.ProcessingR
 	ctx.RequestQuery = requestQuery
 
 	if requestQuery != "" && r.Cache.IsEnabled() {
+		// Start cache lookup span
+		spanCtx, span := observability.StartSpan(ctx.TraceContext, observability.SpanCacheLookup)
+		defer span.End()
+
+		startTime := time.Now()
 		// Try to find a similar cached response
 		cachedResponse, found, err := r.Cache.FindSimilar(requestModel, requestQuery)
+		lookupTime := time.Since(startTime).Milliseconds()
+
+		observability.SetSpanAttributes(span,
+			attribute.String(observability.AttrCacheKey, requestQuery),
+			attribute.Bool(observability.AttrCacheHit, found),
+			attribute.Int64(observability.AttrCacheLookupTimeMs, lookupTime))
+
 		if err != nil {
 			observability.Errorf("Error searching cache: %v", err)
+			observability.RecordError(span, err)
 		} else if found {
 			// Mark this request as a cache hit
 			ctx.VSRCacheHit = true
@@ -440,8 +516,10 @@ func (r *OpenAIRouter) handleCaching(ctx *RequestContext) (*ext_proc.ProcessingR
 			})
 			// Return immediate response from cache
 			response := http.CreateCacheHitResponse(cachedResponse)
+			ctx.TraceContext = spanCtx
 			return response, true
 		}
+		ctx.TraceContext = spanCtx
 	}
 
 	// Cache miss, store the request for later
@@ -482,15 +560,51 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
 		}
 
 		if classificationText != "" {
+			// Start classification span
+			classifyCtx, classifySpan := observability.StartSpan(ctx.TraceContext, observability.SpanClassification)
+			classifyStart := time.Now()
+
 			// Find the most similar task description or classify, then select best model
 			matchedModel := r.classifyAndSelectBestModel(classificationText)
+			classifyTime := time.Since(classifyStart).Milliseconds()
+
+			// Get category information for the span
+			categoryName := r.findCategoryForClassification(classificationText)
+
+			observability.SetSpanAttributes(classifySpan,
+				attribute.String(observability.AttrCategoryName, categoryName),
+				attribute.String(observability.AttrClassifierType, "bert"),
+				attribute.Int64(observability.AttrClassificationTimeMs, classifyTime))
+			classifySpan.End()
+			ctx.TraceContext = classifyCtx
+
 			if matchedModel != originalModel && matchedModel != "" {
-				// Get detected PII for policy checking
+				// Start PII detection span if enabled
 				allContent := pii.ExtractAllContent(userContent, nonUserMessages)
 				if r.PIIChecker.IsPIIEnabled(matchedModel) {
+					piiCtx, piiSpan := observability.StartSpan(ctx.TraceContext, observability.SpanPIIDetection)
+					piiStart := time.Now()
+
 					observability.Infof("PII policy enabled for model %s", matchedModel)
 					detectedPII := r.Classifier.DetectPIIInContent(allContent)
 
+					piiTime := time.Since(piiStart).Milliseconds()
+					piiDetected := len(detectedPII) > 0
+
+					observability.SetSpanAttributes(piiSpan,
+						attribute.Bool(observability.AttrPIIDetected, piiDetected),
+						attribute.Int64(observability.AttrPIIDetectionTimeMs, piiTime))
+
+					if piiDetected {
+						// Convert detected PII to comma-separated string
+						piiTypesStr := strings.Join(detectedPII, ",")
+						observability.SetSpanAttributes(piiSpan,
+							attribute.String(observability.AttrPIITypes, piiTypesStr))
+					}
+
+					piiSpan.End()
+					ctx.TraceContext = piiCtx
+
 					// Check if the initially selected model passes PII policy
 					allowed, deniedPII, err := r.PIIChecker.CheckPolicy(matchedModel, detectedPII)
 					if err != nil {
@@ -544,6 +658,9 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
 
 				observability.Infof("Routing to model: %s", matchedModel)
 
+				// Start routing decision span
+				routingCtx, routingSpan := observability.StartSpan(ctx.TraceContext, observability.SpanRoutingDecision)
+
 				// Check reasoning mode for this category using entropy-based approach
 				useReasoning, categoryName, reasoningDecision := r.getEntropyBasedReasoningModeAndCategory(userContent)
 				observability.Infof("Entropy-based reasoning decision for this query: %v on [%s] model (confidence: %.3f, reason: %s)",
@@ -552,6 +669,18 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
 				effortForMetrics := r.getReasoningEffort(categoryName)
 				metrics.RecordReasoningDecision(categoryName, matchedModel, useReasoning, effortForMetrics)
 
+				// Set routing attributes on span
+				observability.SetSpanAttributes(routingSpan,
+					attribute.String(observability.AttrRoutingStrategy, "auto"),
+					attribute.String(observability.AttrRoutingReason, reasoningDecision.DecisionReason),
+					attribute.String(observability.AttrOriginalModel, originalModel),
+					attribute.String(observability.AttrSelectedModel, matchedModel),
+					attribute.Bool(observability.AttrReasoningEnabled, useReasoning),
+					attribute.String(observability.AttrReasoningEffort, effortForMetrics))
+
+				routingSpan.End()
+				ctx.TraceContext = routingCtx
+
 				// Track VSR decision information
 				ctx.VSRSelectedCategory = categoryName
 				ctx.VSRSelectedModel = matchedModel
@@ -567,15 +696,29 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
 				// Update the actual model that will be used
 				actualModel = matchedModel
 
+				// Start backend selection span
+				backendCtx, backendSpan := observability.StartSpan(ctx.TraceContext, observability.SpanBackendSelection)
+
 				// Select the best endpoint for this model
 				endpointAddress, endpointFound := r.Config.SelectBestEndpointAddressForModel(matchedModel)
 				if endpointFound {
 					selectedEndpoint = endpointAddress
 					observability.Infof("Selected endpoint address: %s for model: %s", selectedEndpoint, matchedModel)
+
+					// Extract endpoint name from config
+					endpoints := r.Config.GetEndpointsForModel(matchedModel)
+					if len(endpoints) > 0 {
+						observability.SetSpanAttributes(backendSpan,
+							attribute.String(observability.AttrEndpointName, endpoints[0].Name),
+							attribute.String(observability.AttrEndpointAddress, selectedEndpoint))
+					}
 				} else {
 					observability.Warnf("No endpoint found for model %s, using fallback", matchedModel)
 				}
 
+				backendSpan.End()
+				ctx.TraceContext = backendCtx
+
 				// Modify the model in the request
 				openAIRequest.Model = openai.ChatModel(matchedModel)
 
@@ -610,14 +753,25 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
 					}
 
 					if category != nil && category.SystemPrompt != "" && category.IsSystemPromptEnabled() {
+						// Start system prompt injection span
+						promptCtx, promptSpan := observability.StartSpan(ctx.TraceContext, observability.SpanSystemPromptInjection)
+
 						mode := category.GetSystemPromptMode()
 						var injected bool
 						modifiedBody, injected, err = addSystemPromptToRequestBody(modifiedBody, category.SystemPrompt, mode)
 						if err != nil {
 							observability.Errorf("Error adding system prompt to request: %v", err)
+							observability.RecordError(promptSpan, err)
 							metrics.RecordRequestError(actualModel, "serialization_error")
+							promptSpan.End()
 							return nil, status.Errorf(codes.Internal, "error adding system prompt: %v", err)
 						}
+
+						observability.SetSpanAttributes(promptSpan,
+							attribute.Bool("system_prompt.injected", injected),
+							attribute.String("system_prompt.mode", mode),
+							attribute.String(observability.AttrCategoryName, categoryName))
+
 						if injected {
 							ctx.VSRInjectedSystemPrompt = true
 							observability.Infof("Added category-specific system prompt for category: %s (mode: %s)", categoryName, mode)
@@ -625,6 +779,9 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
 
 						// Log metadata about system prompt injection (avoid logging sensitive user data)
 						observability.Infof("System prompt injection completed for category: %s, body size: %d bytes", categoryName, len(modifiedBody))
+
+						promptSpan.End()
+						ctx.TraceContext = promptCtx
 					} else if category != nil && category.SystemPrompt != "" && !category.IsSystemPromptEnabled() {
 						observability.Infof("System prompt disabled for category: %s", categoryName)
 					}
diff --git a/src/semantic-router/pkg/observability/propagation.go b/src/semantic-router/pkg/observability/propagation.go
new file mode 100644
index 00000000..a8c6a4b1
--- /dev/null
+++ b/src/semantic-router/pkg/observability/propagation.go
@@ -0,0 +1,43 @@
+package observability
+
+import (
+	"context"
+
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/propagation"
+)
+
+// InjectTraceContext injects trace context into a map (e.g., HTTP headers)
+func InjectTraceContext(ctx context.Context, headers map[string]string) {
+	propagator := otel.GetTextMapPropagator()
+	carrier := propagation.MapCarrier(headers)
+	propagator.Inject(ctx, carrier)
+}
+
+// ExtractTraceContext extracts trace context from a map (e.g., HTTP headers)
+func ExtractTraceContext(ctx context.Context, headers map[string]string) context.Context {
+	propagator := otel.GetTextMapPropagator()
+	carrier := propagation.MapCarrier(headers)
+	return propagator.Extract(ctx, carrier)
+}
+
+// InjectTraceContextToSlice injects trace context into a slice of key-value pairs
+func InjectTraceContextToSlice(ctx context.Context) [][2]string {
+	headers := make(map[string]string)
+	InjectTraceContext(ctx, headers)
+
+	result := make([][2]string, 0, len(headers))
+	for k, v := range headers {
+		result = append(result, [2]string{k, v})
+	}
+	return result
+}
+
+// ExtractTraceContextFromSlice extracts trace context from a slice of key-value pairs
+func ExtractTraceContextFromSlice(ctx context.Context, headers [][2]string) context.Context {
+	headerMap := make(map[string]string, len(headers))
+	for _, h := range headers {
+		headerMap[h[0]] = h[1]
+	}
+	return ExtractTraceContext(ctx, headerMap)
+}
diff --git a/src/semantic-router/pkg/observability/tracing.go b/src/semantic-router/pkg/observability/tracing.go
new file mode 100644
index 00000000..b1c82c12
--- /dev/null
+++ b/src/semantic-router/pkg/observability/tracing.go
@@ -0,0 +1,249 @@
+package observability
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
+	"go.opentelemetry.io/otel/exporters/stdout/stdouttrace"
+	"go.opentelemetry.io/otel/propagation"
+	"go.opentelemetry.io/otel/sdk/resource"
+	sdktrace "go.opentelemetry.io/otel/sdk/trace"
+	semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
+	"go.opentelemetry.io/otel/trace"
+	"google.golang.org/grpc/credentials/insecure"
+)
+
+// TracingConfig holds the tracing configuration
+type TracingConfig struct {
+	Enabled               bool
+	Provider              string
+	ExporterType          string
+	ExporterEndpoint      string
+	ExporterInsecure      bool
+	SamplingType          string
+	SamplingRate          float64
+	ServiceName           string
+	ServiceVersion        string
+	DeploymentEnvironment string
+}
+
+var (
+	tracerProvider *sdktrace.TracerProvider
+	tracer         trace.Tracer
+)
+
+// InitTracing initializes the OpenTelemetry tracing provider
+func InitTracing(ctx context.Context, cfg TracingConfig) error {
+	if !cfg.Enabled {
+		Infof("Distributed tracing is disabled")
+		return nil
+	}
+
+	// Create resource with service information
+	res, err := resource.New(ctx,
+		resource.WithAttributes(
+			semconv.ServiceNameKey.String(cfg.ServiceName),
+			semconv.ServiceVersionKey.String(cfg.ServiceVersion),
+			semconv.DeploymentEnvironmentKey.String(cfg.DeploymentEnvironment),
+		),
+	)
+	if err != nil {
+		return fmt.Errorf("failed to create resource: %w", err)
+	}
+
+	// Create exporter based on configuration
+	var exporter sdktrace.SpanExporter
+	switch cfg.ExporterType {
+	case "otlp":
+		exporter, err = createOTLPExporter(ctx, cfg)
+		if err != nil {
+			return fmt.Errorf("failed to create OTLP exporter: %w", err)
+		}
+	case "stdout":
+		exporter, err = stdouttrace.New(
+			stdouttrace.WithPrettyPrint(),
+		)
+		if err != nil {
+			return fmt.Errorf("failed to create stdout exporter: %w", err)
+		}
+	default:
+		return fmt.Errorf("unsupported exporter type: %s", cfg.ExporterType)
+	}
+
+	// Create sampler based on configuration
+	var sampler sdktrace.Sampler
+	switch cfg.SamplingType {
+	case "always_on":
+		sampler = sdktrace.AlwaysSample()
+	case "always_off":
+		sampler = sdktrace.NeverSample()
+	case "probabilistic":
+		sampler = sdktrace.TraceIDRatioBased(cfg.SamplingRate)
+	default:
+		sampler = sdktrace.AlwaysSample()
+	}
+
+	// Create tracer provider
+	tracerProvider = sdktrace.NewTracerProvider(
+		sdktrace.WithResource(res),
+		sdktrace.WithBatcher(exporter),
+		sdktrace.WithSampler(sampler),
+	)
+
+	// Set global tracer provider
+	otel.SetTracerProvider(tracerProvider)
+
+	// Set global propagator for trace context propagation
+	otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
+		propagation.TraceContext{},
+		propagation.Baggage{},
+	))
+
+	// Create named tracer for the router
+	tracer = tracerProvider.Tracer("semantic-router")
+
+	Infof("Distributed tracing initialized (provider: %s, exporter: %s, sampling: %s)",
+		cfg.Provider, cfg.ExporterType, cfg.SamplingType)
+
+	return nil
+}
+
+// createOTLPExporter creates an OTLP gRPC exporter
+func createOTLPExporter(ctx context.Context, cfg TracingConfig) (sdktrace.SpanExporter, error) {
+	opts := []otlptracegrpc.Option{
+		otlptracegrpc.WithEndpoint(cfg.ExporterEndpoint),
+	}
+
+	if cfg.ExporterInsecure {
+		opts = append(opts, otlptracegrpc.WithTLSCredentials(insecure.NewCredentials()))
+	}
+
+	// Create exporter with timeout context for initialization
+	// Note: We don't use WithBlock() to allow the exporter to connect asynchronously
+	// This prevents blocking on startup if the collector is temporarily unavailable
+	ctxWithTimeout, cancel := context.WithTimeout(ctx, 5*time.Second)
+	defer cancel()
+
+	return otlptracegrpc.New(ctxWithTimeout, opts...)
+}
+
+// ShutdownTracing gracefully shuts down the tracing provider
+func ShutdownTracing(ctx context.Context) error {
+	if tracerProvider != nil {
+		return tracerProvider.Shutdown(ctx)
+	}
+	return nil
+}
+
+// GetTracer returns the global tracer instance
+func GetTracer() trace.Tracer {
+	if tracer == nil {
+		// Return noop tracer if tracing is not initialized
+		return otel.Tracer("semantic-router")
+	}
+	return tracer
+}
+
+// StartSpan starts a new span with the given name and options
+func StartSpan(ctx context.Context, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span) {
+	// Handle nil context by using background context
+	if ctx == nil {
+		ctx = context.Background()
+	}
+
+	if tracer == nil {
+		// Return noop tracer if tracing is not initialized
+		return otel.Tracer("semantic-router").Start(ctx, spanName, opts...)
+	}
+	return tracer.Start(ctx, spanName, opts...)
+}
+
+// SetSpanAttributes sets attributes on a span if it exists
+func SetSpanAttributes(span trace.Span, attrs ...attribute.KeyValue) {
+	if span != nil {
+		span.SetAttributes(attrs...)
+	}
+}
+
+// RecordError records an error on a span if it exists
+func RecordError(span trace.Span, err error) {
+	if span != nil && err != nil {
+		span.RecordError(err)
+	}
+}
+
+// Span attribute keys following OpenInference conventions for LLM observability
+const (
+	// Request metadata
+	AttrRequestID  = "request.id"
+	AttrUserID     = "user.id"
+	AttrSessionID  = "session.id"
+	AttrHTTPMethod = "http.method"
+	AttrHTTPPath   = "http.path"
+
+	// Model information
+	AttrModelName     = "model.name"
+	AttrModelProvider = "model.provider"
+	AttrModelVersion  = "model.version"
+
+	// Classification
+	AttrCategoryName       = "category.name"
+	AttrCategoryConfidence = "category.confidence"
+	AttrClassifierType     = "classifier.type"
+
+	// Routing
+	AttrRoutingStrategy = "routing.strategy"
+	AttrRoutingReason   = "routing.reason"
+	AttrOriginalModel   = "routing.original_model"
+	AttrSelectedModel   = "routing.selected_model"
+	AttrEndpointName    = "endpoint.name"
+	AttrEndpointAddress = "endpoint.address"
+
+	// Security
+	AttrPIIDetected       = "pii.detected"
+	AttrPIITypes          = "pii.types"
+	AttrJailbreakDetected = "jailbreak.detected"
+	AttrJailbreakType     = "jailbreak.type"
+	AttrSecurityAction    = "security.action"
+
+	// Performance
+	AttrTokenCountPrompt     = "token.count.prompt"
+	AttrTokenCountCompletion = "token.count.completion"
+	AttrCacheHit             = "cache.hit"
+	AttrCacheKey             = "cache.key"
+
+	// Reasoning
+	AttrReasoningEnabled = "reasoning.enabled"
+	AttrReasoningEffort  = "reasoning.effort"
+	AttrReasoningFamily  = "reasoning.family"
+
+	// Tools
+	AttrToolsSelected = "tools.selected"
+	AttrToolsCount    = "tools.count"
+
+	// Processing times
+	AttrProcessingTimeMs         = "processing.time_ms"
+	AttrClassificationTimeMs     = "classification.time_ms"
+	AttrCacheLookupTimeMs        = "cache.lookup_time_ms"
+	AttrPIIDetectionTimeMs       = "pii.detection_time_ms"
+	AttrJailbreakDetectionTimeMs = "jailbreak.detection_time_ms"
+)
+
+// Span names for different operations
+const (
+	SpanRequestReceived       = "semantic_router.request.received"
+	SpanClassification        = "semantic_router.classification"
+	SpanPIIDetection          = "semantic_router.security.pii_detection"
+	SpanJailbreakDetection    = "semantic_router.security.jailbreak_detection"
+	SpanCacheLookup           = "semantic_router.cache.lookup"
+	SpanRoutingDecision       = "semantic_router.routing.decision"
+	SpanBackendSelection      = "semantic_router.backend.selection"
+	SpanUpstreamRequest       = "semantic_router.upstream.request"
+	SpanResponseProcessing    = "semantic_router.response.processing"
+	SpanToolSelection         = "semantic_router.tools.selection"
+	SpanSystemPromptInjection = "semantic_router.system_prompt.injection"
+)
diff --git a/src/semantic-router/pkg/observability/tracing_test.go b/src/semantic-router/pkg/observability/tracing_test.go
new file mode 100644
index 00000000..4141be97
--- /dev/null
+++ b/src/semantic-router/pkg/observability/tracing_test.go
@@ -0,0 +1,230 @@
+package observability
+
+import (
+	"context"
+	"testing"
+
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/codes"
+)
+
+func TestTracingConfiguration(t *testing.T) {
+	tests := []struct {
+		name    string
+		cfg     TracingConfig
+		wantErr bool
+	}{
+		{
+			name: "disabled tracing",
+			cfg: TracingConfig{
+				Enabled: false,
+			},
+			wantErr: false,
+		},
+		{
+			name: "stdout exporter",
+			cfg: TracingConfig{
+				Enabled:               true,
+				Provider:              "opentelemetry",
+				ExporterType:          "stdout",
+				SamplingType:          "always_on",
+				ServiceName:           "test-service",
+				ServiceVersion:        "v1.0.0",
+				DeploymentEnvironment: "test",
+			},
+			wantErr: false,
+		},
+		{
+			name: "probabilistic sampling",
+			cfg: TracingConfig{
+				Enabled:               true,
+				Provider:              "opentelemetry",
+				ExporterType:          "stdout",
+				SamplingType:          "probabilistic",
+				SamplingRate:          0.5,
+				ServiceName:           "test-service",
+				ServiceVersion:        "v1.0.0",
+				DeploymentEnvironment: "test",
+			},
+			wantErr: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ctx := context.Background()
+			err := InitTracing(ctx, tt.cfg)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("InitTracing() error = %v, wantErr %v", err, tt.wantErr)
+			}
+
+			// Cleanup
+			if err == nil {
+				shutdownCtx := context.Background()
+				_ = ShutdownTracing(shutdownCtx)
+			}
+		})
+	}
+}
+
+func TestSpanCreation(t *testing.T) {
+	// Initialize tracing with stdout exporter
+	ctx := context.Background()
+	cfg := TracingConfig{
+		Enabled:               true,
+		Provider:              "opentelemetry",
+		ExporterType:          "stdout",
+		SamplingType:          "always_on",
+		ServiceName:           "test-service",
+		ServiceVersion:        "v1.0.0",
+		DeploymentEnvironment: "test",
+	}
+
+	err := InitTracing(ctx, cfg)
+	if err != nil {
+		t.Fatalf("Failed to initialize tracing: %v", err)
+	}
+	defer func() {
+		shutdownCtx := context.Background()
+		_ = ShutdownTracing(shutdownCtx)
+	}()
+
+	// Test span creation
+	spanCtx, span := StartSpan(ctx, SpanRequestReceived)
+	if span == nil {
+		t.Fatal("StartSpan returned nil span")
+	}
+
+	// Test setting attributes
+	SetSpanAttributes(span,
+		attribute.String(AttrRequestID, "test-request-123"),
+		attribute.String(AttrModelName, "gpt-4"),
+	)
+
+	// Test recording error
+	testErr := context.Canceled
+	RecordError(span, testErr)
+	span.SetStatus(codes.Error, "test error")
+
+	span.End()
+
+	// Verify context was updated
+	if spanCtx == nil {
+		t.Fatal("StartSpan returned nil context")
+	}
+}
+
+func TestTraceContextPropagation(t *testing.T) {
+	// Initialize tracing
+	ctx := context.Background()
+	cfg := TracingConfig{
+		Enabled:               true,
+		Provider:              "opentelemetry",
+		ExporterType:          "stdout",
+		SamplingType:          "always_on",
+		ServiceName:           "test-service",
+		ServiceVersion:        "v1.0.0",
+		DeploymentEnvironment: "test",
+	}
+
+	err := InitTracing(ctx, cfg)
+	if err != nil {
+		t.Fatalf("Failed to initialize tracing: %v", err)
+	}
+	defer func() {
+		shutdownCtx := context.Background()
+		_ = ShutdownTracing(shutdownCtx)
+	}()
+
+	// Create a span to establish trace context
+	spanCtx, span := StartSpan(ctx, "test-span")
+	defer span.End()
+
+	// Test injection
+	headers := make(map[string]string)
+	InjectTraceContext(spanCtx, headers)
+
+	// Verify trace context was injected
+	if len(headers) == 0 {
+		t.Error("InjectTraceContext did not inject any headers")
+	}
+
+	// Test extraction
+	extractedCtx := ExtractTraceContext(ctx, headers)
+	if extractedCtx == nil {
+		t.Error("ExtractTraceContext returned nil context")
+	}
+}
+
+func TestGetTracerWhenNotInitialized(t *testing.T) {
+	// Don't initialize tracing
+	tracer := GetTracer()
+	if tracer == nil {
+		t.Error("GetTracer returned nil when not initialized")
+	}
+
+	// Should return a noop tracer that doesn't panic
+	ctx := context.Background()
+	_, span := tracer.Start(ctx, "test-span")
+	if span == nil {
+		t.Error("Noop tracer returned nil span")
+	}
+	span.End()
+}
+
+func TestStartSpanWithNilContext(t *testing.T) {
+	// Test that StartSpan handles nil context gracefully
+	// This simulates the scenario where TraceContext may not be initialized
+	ctx, span := StartSpan(nil, "test-span")
+	if span == nil {
+		t.Error("StartSpan returned nil span with nil context")
+	}
+	if ctx == nil {
+		t.Error("StartSpan returned nil context")
+	}
+	span.End()
+}
+
+func TestSpanAttributeConstants(t *testing.T) {
+	// Verify span name constants are defined
+	spanNames := []string{
+		SpanRequestReceived,
+		SpanClassification,
+		SpanPIIDetection,
+		SpanJailbreakDetection,
+		SpanCacheLookup,
+		SpanRoutingDecision,
+		SpanBackendSelection,
+		SpanUpstreamRequest,
+		SpanResponseProcessing,
+		SpanToolSelection,
+		SpanSystemPromptInjection,
+	}
+
+	for _, name := range spanNames {
+		if name == "" {
+			t.Errorf("Span name constant is empty")
+		}
+		if len(name) < 10 {
+			t.Errorf("Span name %q is too short", name)
+		}
+	}
+
+	// Verify attribute key constants are defined
+	attrKeys := []string{
+		AttrRequestID,
+		AttrModelName,
+		AttrCategoryName,
+		AttrRoutingStrategy,
+		AttrPIIDetected,
+		AttrJailbreakDetected,
+		AttrCacheHit,
+		AttrReasoningEnabled,
+	}
+
+	for _, key := range attrKeys {
+		if key == "" {
+			t.Errorf("Attribute key constant is empty")
+		}
+	}
+}
diff --git a/website/docs/tutorials/observability/distributed-tracing.md b/website/docs/tutorials/observability/distributed-tracing.md
new file mode 100644
index 00000000..a0e47612
--- /dev/null
+++ b/website/docs/tutorials/observability/distributed-tracing.md
@@ -0,0 +1,519 @@
+# Distributed Tracing with OpenTelemetry
+
+This guide explains how to configure and use distributed tracing in vLLM Semantic Router for enhanced observability and debugging capabilities.
+
+## Overview
+
+vLLM Semantic Router implements comprehensive distributed tracing using OpenTelemetry, providing fine-grained visibility into the request processing pipeline. Tracing helps you:
+
+- **Debug Production Issues**: Trace individual requests through the entire routing pipeline
+- **Optimize Performance**: Identify bottlenecks in classification, caching, and routing
+- **Monitor Security**: Track PII detection and jailbreak prevention operations
+- **Analyze Decisions**: Understand routing logic and reasoning mode selection
+- **Correlate Services**: Connect traces across the router and vLLM backends
+
+## Architecture
+
+### Trace Hierarchy
+
+A typical request trace follows this structure:
+
+```
+semantic_router.request.received [root span]
+├─ semantic_router.classification
+├─ semantic_router.security.pii_detection
+├─ semantic_router.security.jailbreak_detection
+├─ semantic_router.cache.lookup
+├─ semantic_router.routing.decision
+├─ semantic_router.backend.selection
+├─ semantic_router.system_prompt.injection
+└─ semantic_router.upstream.request
+```
+
+### Span Attributes
+
+Each span includes rich attributes following OpenInference conventions for LLM observability:
+
+**Request Metadata:**
+
+- `request.id` - Unique request identifier
+- `user.id` - User identifier (if available)
+- `http.method` - HTTP method
+- `http.path` - Request path
+
+**Model Information:**
+
+- `model.name` - Selected model name
+- `routing.original_model` - Original requested model
+- `routing.selected_model` - Model selected by router
+
+**Classification:**
+
+- `category.name` - Classified category
+- `classifier.type` - Classifier implementation
+- `classification.time_ms` - Classification duration
+
+**Security:**
+
+- `pii.detected` - Whether PII was found
+- `pii.types` - Types of PII detected
+- `jailbreak.detected` - Whether jailbreak attempt detected
+- `security.action` - Action taken (blocked, allowed)
+
+**Routing:**
+
+- `routing.strategy` - Routing strategy (auto, specified)
+- `routing.reason` - Reason for routing decision
+- `reasoning.enabled` - Whether reasoning mode enabled
+- `reasoning.effort` - Reasoning effort level
+
+**Performance:**
+
+- `cache.hit` - Cache hit/miss status
+- `cache.lookup_time_ms` - Cache lookup duration
+- `processing.time_ms` - Total processing time
+
+## Configuration
+
+### Basic Configuration
+
+Add the `observability.tracing` section to your `config.yaml`:
+
+```yaml
+observability:
+  tracing:
+    enabled: true
+    provider: "opentelemetry"
+    exporter:
+      type: "stdout"  # or "otlp"
+      endpoint: "localhost:4317"
+      insecure: true
+    sampling:
+      type: "always_on"  # or "probabilistic"
+      rate: 1.0
+    resource:
+      service_name: "vllm-semantic-router"
+      service_version: "v0.1.0"
+      deployment_environment: "production"
+```
+
+### Configuration Options
+
+#### Exporter Types
+
+**stdout** - Print traces to console (development)
+
+```yaml
+exporter:
+  type: "stdout"
+```
+
+**otlp** - Export to OTLP-compatible backend (production)
+
+```yaml
+exporter:
+  type: "otlp"
+  endpoint: "jaeger:4317"  # Jaeger, Tempo, Datadog, etc.
+  insecure: true  # Use false with TLS in production
+```
+
+#### Sampling Strategies
+
+**always_on** - Sample all requests (development/debugging)
+
+```yaml
+sampling:
+  type: "always_on"
+```
+
+**always_off** - Disable sampling (emergency performance)
+
+```yaml
+sampling:
+  type: "always_off"
+```
+
+**probabilistic** - Sample a percentage of requests (production)
+
+```yaml
+sampling:
+  type: "probabilistic"
+  rate: 0.1  # Sample 10% of requests
+```
+
+### Environment-Specific Configurations
+
+#### Development
+
+```yaml
+observability:
+  tracing:
+    enabled: true
+    provider: "opentelemetry"
+    exporter:
+      type: "stdout"
+    sampling:
+      type: "always_on"
+    resource:
+      service_name: "vllm-semantic-router-dev"
+      deployment_environment: "development"
+```
+
+#### Production
+
+```yaml
+observability:
+  tracing:
+    enabled: true
+    provider: "opentelemetry"
+    exporter:
+      type: "otlp"
+      endpoint: "tempo:4317"
+      insecure: false  # Use TLS
+    sampling:
+      type: "probabilistic"
+      rate: 0.1  # 10% sampling
+    resource:
+      service_name: "vllm-semantic-router"
+      service_version: "v0.1.0"
+      deployment_environment: "production"
+```
+
+## Deployment
+
+### With Jaeger
+
+1. **Start Jaeger** (all-in-one for testing):
+
+```bash
+docker run -d --name jaeger \
+  -p 4317:4317 \
+  -p 16686:16686 \
+  jaegertracing/all-in-one:latest
+```
+
+2. **Configure Router**:
+
+```yaml
+observability:
+  tracing:
+    enabled: true
+    exporter:
+      type: "otlp"
+      endpoint: "localhost:4317"
+      insecure: true
+    sampling:
+      type: "probabilistic"
+      rate: 0.1
+```
+
+3. **Access Jaeger UI**: http://localhost:16686
+
+### With Grafana Tempo
+
+1. **Configure Tempo** (tempo.yaml):
+
+```yaml
+server:
+  http_listen_port: 3200
+
+distributor:
+  receivers:
+    otlp:
+      protocols:
+        grpc:
+          endpoint: 0.0.0.0:4317
+
+storage:
+  trace:
+    backend: local
+    local:
+      path: /tmp/tempo/traces
+```
+
+2. **Start Tempo**:
+
+```bash
+docker run -d --name tempo \
+  -p 4317:4317 \
+  -p 3200:3200 \
+  -v $(pwd)/tempo.yaml:/etc/tempo.yaml \
+  grafana/tempo:latest \
+  -config.file=/etc/tempo.yaml
+```
+
+3. **Configure Router**:
+
+```yaml
+observability:
+  tracing:
+    enabled: true
+    exporter:
+      type: "otlp"
+      endpoint: "tempo:4317"
+      insecure: true
+```
+
+### Kubernetes Deployment
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: router-config
+data:
+  config.yaml: |
+    observability:
+      tracing:
+        enabled: true
+        exporter:
+          type: "otlp"
+          endpoint: "jaeger-collector.observability.svc:4317"
+          insecure: false
+        sampling:
+          type: "probabilistic"
+          rate: 0.1
+        resource:
+          service_name: "vllm-semantic-router"
+          deployment_environment: "production"
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: semantic-router
+spec:
+  template:
+    spec:
+      containers:
+      - name: router
+        image: vllm-semantic-router:latest
+        env:
+        - name: CONFIG_PATH
+          value: /config/config.yaml
+        volumeMounts:
+        - name: config
+          mountPath: /config
+      volumes:
+      - name: config
+        configMap:
+          name: router-config
+```
+
+## Usage Examples
+
+### Viewing Traces
+
+#### Console Output (stdout exporter)
+
+```json
+{
+  "Name": "semantic_router.classification",
+  "SpanContext": {
+    "TraceID": "abc123...",
+    "SpanID": "def456..."
+  },
+  "Attributes": [
+    {
+      "Key": "category.name",
+      "Value": "math"
+    },
+    {
+      "Key": "classification.time_ms",
+      "Value": 45
+    }
+  ],
+  "Duration": 45000000
+}
+```
+
+#### Jaeger UI
+
+1. Navigate to http://localhost:16686
+2. Select service: `vllm-semantic-router`
+3. Click "Find Traces"
+4. View trace details and timeline
+
+### Analyzing Performance
+
+**Find slow requests:**
+
+```
+Service: vllm-semantic-router
+Min Duration: 1s
+Limit: 20
+```
+
+**Analyze classification bottlenecks:**
+Filter by operation: `semantic_router.classification`
+Sort by duration (descending)
+
+**Track cache effectiveness:**
+Filter by tag: `cache.hit = true`
+Compare durations with cache misses
+
+### Debugging Issues
+
+**Find failed requests:**
+Filter by tag: `error = true`
+
+**Trace specific request:**
+Filter by tag: `request.id = req-abc-123`
+
+**Find PII violations:**
+Filter by tag: `security.action = blocked`
+
+## Trace Context Propagation
+
+The router automatically propagates trace context using W3C Trace Context headers:
+
+**Request headers** (extracted by router):
+
+```
+traceparent: 00-abc123-def456-01
+tracestate: vendor=value
+```
+
+**Upstream headers** (injected by router):
+
+```
+traceparent: 00-abc123-ghi789-01
+x-gateway-destination-endpoint: endpoint1
+x-selected-model: gpt-4
+```
+
+This enables end-to-end tracing from client → router → vLLM backend.
+
+## Performance Considerations
+
+### Overhead
+
+Tracing adds minimal overhead when properly configured:
+
+- **Always-on sampling**: ~1-2% latency increase
+- **10% probabilistic**: ~0.1-0.2% latency increase
+- **Async export**: No blocking on span export
+
+### Optimization Tips
+
+1. **Use probabilistic sampling in production**
+
+   ```yaml
+   sampling:
+     type: "probabilistic"
+     rate: 0.1  # Adjust based on traffic
+   ```
+
+2. **Adjust sampling rate dynamically**
+   - High traffic: 0.01-0.1 (1-10%)
+   - Medium traffic: 0.1-0.5 (10-50%)
+   - Low traffic: 0.5-1.0 (50-100%)
+
+3. **Use batch exporters** (default)
+   - Spans are batched before export
+   - Reduces network overhead
+
+4. **Monitor exporter health**
+   - Watch for export failures in logs
+   - Configure retry policies
+
+## Troubleshooting
+
+### Traces Not Appearing
+
+1. **Check tracing is enabled**:
+
+```yaml
+observability:
+  tracing:
+    enabled: true
+```
+
+2. **Verify exporter endpoint**:
+
+```bash
+# Test OTLP endpoint connectivity
+telnet jaeger 4317
+```
+
+3. **Check logs for errors**:
+
+```
+Failed to export spans: connection refused
+```
+
+### Missing Spans
+
+1. **Check sampling rate**:
+
+```yaml
+sampling:
+  type: "probabilistic"
+  rate: 1.0  # Increase to see more traces
+```
+
+2. **Verify span creation in code**:
+
+- Spans are created at key processing points
+- Check for nil context
+
+### High Memory Usage
+
+1. **Reduce sampling rate**:
+
+```yaml
+sampling:
+  rate: 0.01  # 1% sampling
+```
+
+2. **Verify batch exporter is working**:
+
+- Check export interval
+- Monitor queue length
+
+## Best Practices
+
+1. **Start with stdout in development**
+   - Easy to verify tracing works
+   - No external dependencies
+
+2. **Use probabilistic sampling in production**
+   - Balances visibility and performance
+   - Start with 10% and adjust
+
+3. **Set meaningful service names**
+   - Use environment-specific names
+   - Include version information
+
+4. **Add custom attributes for your use case**
+   - Customer IDs
+   - Deployment region
+   - Feature flags
+
+5. **Monitor exporter health**
+   - Track export success rate
+   - Alert on high failure rates
+
+6. **Correlate with metrics**
+   - Use same service name
+   - Cross-reference trace IDs in logs
+
+## Integration with vLLM Stack
+
+### Future Enhancements
+
+The tracing implementation is designed to support future integration with vLLM backends:
+
+1. **Trace context propagation** to vLLM
+2. **Correlated spans** across router and engine
+3. **End-to-end latency** analysis
+4. **Token-level timing** from vLLM
+
+Stay tuned for updates on vLLM integration!
+
+## References
+
+- [OpenTelemetry Go SDK](https://github.com/open-telemetry/opentelemetry-go)
+- [OpenInference Semantic Conventions](https://github.com/Arize-ai/openinference)
+- [Jaeger Documentation](https://www.jaegertracing.io/docs/)
+- [Grafana Tempo](https://grafana.com/oss/tempo/)
+- [W3C Trace Context](https://www.w3.org/TR/trace-context/)
diff --git a/website/docs/tutorials/observability/tracing-quickstart.md b/website/docs/tutorials/observability/tracing-quickstart.md
new file mode 100644
index 00000000..ffe88586
--- /dev/null
+++ b/website/docs/tutorials/observability/tracing-quickstart.md
@@ -0,0 +1,115 @@
+# Quick Start: Distributed Tracing
+
+Get started with distributed tracing in 5 minutes.
+
+## Step 1: Enable Tracing
+
+Edit your `config.yaml`:
+
+```yaml
+observability:
+  tracing:
+    enabled: true
+    provider: "opentelemetry"
+    exporter:
+      type: "stdout"
+    sampling:
+      type: "always_on"
+    resource:
+      service_name: "vllm-semantic-router"
+      deployment_environment: "development"
+```
+
+## Step 2: Start the Router
+
+```bash
+./semantic-router --config config.yaml
+```
+
+## Step 3: Send a Test Request
+
+```bash
+curl -X POST http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "auto",
+    "messages": [{"role": "user", "content": "What is 2+2?"}]
+  }'
+```
+
+## Step 4: View Traces
+
+Check your console output for JSON trace spans:
+
+```json
+{
+  "Name": "semantic_router.request.received",
+  "Attributes": [
+    {"Key": "request.id", "Value": "req-123"},
+    {"Key": "http.method", "Value": "POST"}
+  ]
+}
+```
+
+## What's Next?
+
+### Production Deployment with Jaeger
+
+1. **Start Jaeger**:
+
+   ```bash
+   docker run -d -p 4317:4317 -p 16686:16686 \
+     jaegertracing/all-in-one:latest
+   ```
+
+2. **Update config.yaml**:
+
+   ```yaml
+   observability:
+     tracing:
+       enabled: true
+       exporter:
+         type: "otlp"
+         endpoint: "localhost:4317"
+         insecure: true
+       sampling:
+         type: "probabilistic"
+         rate: 0.1
+   ```
+
+3. **View traces**: http://localhost:16686
+
+### Key Metrics to Monitor
+
+- **Classification Time**: `classification.time_ms` attribute
+- **Cache Hit Rate**: Filter by `cache.hit = true`
+- **Security Blocks**: Filter by `security.action = blocked`
+- **Routing Decisions**: `routing.strategy` and `routing.reason` attributes
+
+### Common Use Cases
+
+**Find slow requests:**
+
+```
+Min Duration: 1s
+Service: vllm-semantic-router
+```
+
+**Debug specific request:**
+
+```
+Tags: request.id = req-abc-123
+```
+
+**Analyze classification performance:**
+
+```
+Operation: semantic_router.classification
+Sort by: Duration (desc)
+```
+
+## Learn More
+
+- [Full Distributed Tracing Guide](./distributed-tracing.md)
+- [Configuration Reference](../../installation/configuration.md)
+- [Observability Overview](./observability.md)