diff --git a/website/docs/tutorials/observability/observability.md b/website/docs/tutorials/observability/metrics.md similarity index 99% rename from website/docs/tutorials/observability/observability.md rename to website/docs/tutorials/observability/metrics.md index af523e6d..913d662b 100644 --- a/website/docs/tutorials/observability/observability.md +++ b/website/docs/tutorials/observability/metrics.md @@ -1,4 +1,4 @@ -# Observability +# Metrics & Monitoring Metrics collection and visualization for Semantic Router using Prometheus and Grafana. diff --git a/website/docs/tutorials/observability/tracing-quickstart.md b/website/docs/tutorials/observability/tracing-quickstart.md deleted file mode 100644 index ffe88586..00000000 --- a/website/docs/tutorials/observability/tracing-quickstart.md +++ /dev/null @@ -1,115 +0,0 @@ -# Quick Start: Distributed Tracing - -Get started with distributed tracing in 5 minutes. - -## Step 1: Enable Tracing - -Edit your `config.yaml`: - -```yaml -observability: - tracing: - enabled: true - provider: "opentelemetry" - exporter: - type: "stdout" - sampling: - type: "always_on" - resource: - service_name: "vllm-semantic-router" - deployment_environment: "development" -``` - -## Step 2: Start the Router - -```bash -./semantic-router --config config.yaml -``` - -## Step 3: Send a Test Request - -```bash -curl -X POST http://localhost:8080/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "auto", - "messages": [{"role": "user", "content": "What is 2+2?"}] - }' -``` - -## Step 4: View Traces - -Check your console output for JSON trace spans: - -```json -{ - "Name": "semantic_router.request.received", - "Attributes": [ - {"Key": "request.id", "Value": "req-123"}, - {"Key": "http.method", "Value": "POST"} - ] -} -``` - -## What's Next? - -### Production Deployment with Jaeger - -1. **Start Jaeger**: - - ```bash - docker run -d -p 4317:4317 -p 16686:16686 \ - jaegertracing/all-in-one:latest - ``` - -2. **Update config.yaml**: - - ```yaml - observability: - tracing: - enabled: true - exporter: - type: "otlp" - endpoint: "localhost:4317" - insecure: true - sampling: - type: "probabilistic" - rate: 0.1 - ``` - -3. **View traces**: http://localhost:16686 - -### Key Metrics to Monitor - -- **Classification Time**: `classification.time_ms` attribute -- **Cache Hit Rate**: Filter by `cache.hit = true` -- **Security Blocks**: Filter by `security.action = blocked` -- **Routing Decisions**: `routing.strategy` and `routing.reason` attributes - -### Common Use Cases - -**Find slow requests:** - -``` -Min Duration: 1s -Service: vllm-semantic-router -``` - -**Debug specific request:** - -``` -Tags: request.id = req-abc-123 -``` - -**Analyze classification performance:** - -``` -Operation: semantic_router.classification -Sort by: Duration (desc) -``` - -## Learn More - -- [Full Distributed Tracing Guide](./distributed-tracing.md) -- [Configuration Reference](../../installation/configuration.md) -- [Observability Overview](./observability.md) diff --git a/website/docs/tutorials/semantic-cache/in-memory-cache.md b/website/docs/tutorials/semantic-cache/in-memory-cache.md index 2bc2291c..e06d02ea 100644 --- a/website/docs/tutorials/semantic-cache/in-memory-cache.md +++ b/website/docs/tutorials/semantic-cache/in-memory-cache.md @@ -162,4 +162,4 @@ The in-memory cache automatically manages memory through: - **[Milvus Cache](./milvus-cache.md)** - Set up persistent, distributed caching - **[Cache Overview](./overview.md)** - Learn about semantic caching concepts -- **[Observability](../observability/observability.md)** - Monitor cache performance +- **[Observability](../observability/overview.md)** - Monitor cache performance diff --git a/website/docs/tutorials/semantic-cache/milvus-cache.md b/website/docs/tutorials/semantic-cache/milvus-cache.md index d6ea9c57..5b1ff7cf 100644 --- a/website/docs/tutorials/semantic-cache/milvus-cache.md +++ b/website/docs/tutorials/semantic-cache/milvus-cache.md @@ -145,5 +145,5 @@ curl -X POST http://localhost:8080/v1/chat/completions \ ## Next Steps - **[In-Memory Cache](./in-memory-cache.md)** - Compare with in-memory caching -- **[Cache Overview](./overview.md)** - Learn semantic caching concepts -- **[Observability](../observability/observability.md)** - Monitor Milvus performance +- **[Cache Overview](./overview.md)** - Learn semantic caching concepts +- **[Observability](../observability/overview.md)** - Monitor Milvus performance diff --git a/website/sidebars.ts b/website/sidebars.ts index a67f9670..67229a71 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -87,7 +87,9 @@ const sidebars: SidebarsConfig = { label: 'Observability', items: [ 'tutorials/observability/overview', - 'tutorials/observability/observability', + 'tutorials/observability/metrics', + 'tutorials/observability/distributed-tracing', + 'tutorials/observability/open-webui-integration', ], }, ],