diff --git a/deploy/docker-compose.tracing.yaml b/deploy/docker-compose.tracing.yaml deleted file mode 100644 index 9522221f..00000000 --- a/deploy/docker-compose.tracing.yaml +++ /dev/null @@ -1,55 +0,0 @@ -version: '3.8' - -services: - # Jaeger all-in-one for distributed tracing - jaeger: - image: jaegertracing/all-in-one:latest - container_name: jaeger - ports: - - "4317:4317" # OTLP gRPC - - "4318:4318" # OTLP HTTP - - "16686:16686" # Jaeger UI - - "14268:14268" # Jaeger collector - environment: - - COLLECTOR_OTLP_ENABLED=true - networks: - - router-network - - # Semantic Router with tracing enabled - semantic-router: - image: vllm-semantic-router:latest - container_name: semantic-router - depends_on: - - jaeger - ports: - - "50051:50051" # gRPC ExtProc - - "8080:8080" # Classification API - - "9190:9190" # Metrics - volumes: - - ./config:/config - environment: - - CONFIG_PATH=/config/config.tracing.yaml - networks: - - router-network - - # Grafana for visualization - grafana: - image: grafana/grafana:latest - container_name: grafana - ports: - - "3000:3000" - environment: - - GF_AUTH_ANONYMOUS_ENABLED=true - - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin - volumes: - - ./grafana/provisioning:/etc/grafana/provisioning - - grafana-storage:/var/lib/grafana - networks: - - router-network - -networks: - router-network: - driver: bridge - -volumes: - grafana-storage: diff --git a/deploy/docker-compose/README.md b/deploy/docker-compose/README.md new file mode 100644 index 00000000..3a200167 --- /dev/null +++ b/deploy/docker-compose/README.md @@ -0,0 +1,42 @@ +# Main Runtime Compose Stack + +This directory contains the primary `docker-compose.yml` used to run the semantic-router stack (router + envoy + optional mock-vllm + observability). + +## Path Layout +Because this file lives under `deploy/docker-compose/`, all relative paths to repository resources go two levels up (../../) back to repo root. + +Example mappings: + +- `../../config` -> mounts to `/app/config` inside containers +- `../../models` -> shared model files +- `../../tools/observability/...` -> Prometheus / Grafana provisioning assets + +## Profiles + +- `testing` : enables `mock-vllm` and `llm-katan` +- `llm-katan` : only `llm-katan` + +## Common Commands + +```bash +# Bring up core stack +docker compose -f deploy/docker-compose/docker-compose.yml up --build + +# With testing profile (adds mock-vllm & llm-katan) +docker compose -f deploy/docker-compose/docker-compose.yml --profile testing up --build + +# Tear down +docker compose -f deploy/docker-compose/docker-compose.yml down +``` + +## Overrides +You can place a `docker-compose.override.yml` at repo root and combine: + +```bash +docker compose -f deploy/docker-compose/docker-compose.yml -f docker-compose.override.yml up -d +``` + +## Related Stacks + +- Local observability only: `tools/observability/docker-compose.obs.yml` +- Tracing stack: `tools/tracing/docker-compose.tracing.yaml` diff --git a/docker-compose.yml b/deploy/docker-compose/docker-compose.yml similarity index 81% rename from docker-compose.yml rename to deploy/docker-compose/docker-compose.yml index 8b6990cc..c78c22af 100644 --- a/docker-compose.yml +++ b/deploy/docker-compose/docker-compose.yml @@ -3,14 +3,14 @@ services: # Semantic Router External Processor Service semantic-router: build: - context: . + context: ../../ dockerfile: Dockerfile.extproc container_name: semantic-router ports: - "50051:50051" volumes: - - ./config:/app/config:ro - - ./models:/app/models:ro + - ../../config:/app/config:ro + - ../../models:/app/models:ro environment: - LD_LIBRARY_PATH=/app/lib - CONFIG_FILE=${CONFIG_FILE:-/app/config/config.yaml} @@ -31,7 +31,7 @@ services: - "8801:8801" # Main proxy port - "19000:19000" # Admin interface volumes: - - ./config/envoy-docker.yaml:/etc/envoy/envoy.yaml:ro + - ../../config/envoy-docker.yaml:/etc/envoy/envoy.yaml:ro command: ["/usr/local/bin/envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "ext_proc:trace,router:trace,http:trace"] depends_on: semantic-router: @@ -48,7 +48,7 @@ services: # Mock vLLM service for testing profile mock-vllm: build: - context: ./tools/mock-vllm + context: ../../tools/mock-vllm dockerfile: Dockerfile container_name: mock-vllm profiles: ["testing"] @@ -69,7 +69,7 @@ services: image: prom/prometheus:v2.53.0 container_name: prometheus volumes: - - ./tools/observability/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro + - ../../tools/observability/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro - prometheus-data:/prometheus command: - --config.file=/etc/prometheus/prometheus.yaml @@ -91,9 +91,9 @@ services: ports: - "3000:3000" volumes: - - ./tools/observability/grafana-datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro - - ./tools/observability/grafana-dashboard.yaml:/etc/grafana/provisioning/dashboards/dashboard.yaml:ro - - ./tools/observability/llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro + - ../../tools/observability/grafana-datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro + - ../../tools/observability/grafana-dashboard.yaml:/etc/grafana/provisioning/dashboards/dashboard.yaml:ro + - ../../tools/observability/llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro - grafana-data:/var/lib/grafana networks: - semantic-network @@ -103,7 +103,7 @@ services: # LLM Katan service for testing llm-katan: build: - context: ./e2e-tests/llm-katan + context: ../../e2e-tests/llm-katan dockerfile: Dockerfile container_name: llm-katan profiles: ["testing", "llm-katan"] diff --git a/deploy/llm-router-dashboard.json b/deploy/llm-router-dashboard.json deleted file mode 100644 index ff136b6e..00000000 --- a/deploy/llm-router-dashboard.json +++ /dev/null @@ -1,1238 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 18, - "links": [], - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 4, - "options": { - "displayMode": "gradient", - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "maxVizHeight": 300, - "minVizHeight": 16, - "minVizWidth": 8, - "namePlacement": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "sizing": "auto", - "valueMode": "color", - "text": { - "valueSize": 24 - } - }, - "pluginVersion": "11.5.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum by(category) (llm_category_classifications_count)", - "fullMetaSearch": false, - "includeNullMetadata": true, - "instant": true, - "legendFormat": "{{category}}", - "range": false, - "refId": "A", - "useBackend": false - } - ], - "title": "Prompt Category", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Tokens/sec", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "tps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "id": 2, - "options": { - "legend": { - "calcs": [ - "mean", - "max", - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum(rate(llm_model_completion_tokens_total[5m])) by (model)", - "legendFormat": "Completion Tokens {{model}}", - "range": true, - "refId": "A" - } - ], - "title": "Token Usage Rate by Model", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Routes/sec", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 3, - "options": { - "legend": { - "calcs": [ - "mean", - "max", - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum(rate(llm_model_routing_modifications_total[5m])) by (source_model, target_model)", - "format": "time_series", - "legendFormat": "{{source_model}} -> {{target_model}}", - "range": true, - "refId": "A" - } - ], - "title": "Model Routing Rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Seconds", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "id": 1, - "options": { - "legend": { - "calcs": [ - "mean", - "max", - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum(rate(llm_model_completion_latency_seconds_bucket[5m])) by (le, model))", - "legendFormat": "p95 {{model}}", - "range": true, - "refId": "A" - } - ], - "title": "Model Completion Latency (p95)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Seconds", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "id": 5, - "options": { - "legend": { - "calcs": [ - "mean", - "max", - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum(rate(llm_model_ttft_seconds_bucket[5m])) by (le, model))", - "legendFormat": "TTFT p95 {{model}}", - "range": true, - "refId": "A" - } - ], - "title": "TTFT (p95) by Model", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Seconds per token", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "id": 6, - "options": { - "legend": { - "calcs": [ - "mean", - "max", - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum(rate(llm_model_tpot_seconds_bucket[5m])) by (le, model))", - "legendFormat": "TPOT p95 {{model}}", - "range": true, - "refId": "A" - } - ], - "title": "TPOT (p95) by Model (sec/token)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Requests/sec", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 24 - }, - "id": 7, - "options": { - "legend": { - "calcs": [ - "mean", - "max", - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum(rate(llm_reasoning_decisions_total{enabled=\"true\"}[5m])) by (model, effort)", - "legendFormat": "Reasoning Enabled: {{model}} ({{effort}})", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum(rate(llm_reasoning_decisions_total{enabled=\"false\"}[5m])) by (model)", - "legendFormat": "Reasoning Disabled: {{model}}", - "range": true, - "refId": "B" - } - ], - "title": "Reasoning Rate by Model", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Cost", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "currencyUSD" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 24 - }, - "id": 8, - "options": { - "legend": { - "calcs": [ - "mean", - "max", - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum(rate(llm_model_cost_total{currency=\"USD\"}[5m])) by (model)", - "legendFormat": "Cost/sec: {{model}}", - "range": true, - "refId": "A" - } - ], - "title": "Model Cost Rate (USD/sec)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Errors/sec", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 - }, - "id": 9, - "options": { - "legend": { - "calcs": [ - "mean", - "max", - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum(rate(llm_request_errors_total{reason=\"pii_policy_denied\"}[5m])) by (model)", - "legendFormat": "PII Policy Denied: {{model}}", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum(rate(llm_request_errors_total{reason=\"jailbreak_block\"}[5m])) by (model)", - "legendFormat": "Jailbreak Block: {{model}}", - "range": true, - "refId": "B" - } - ], - "title": "Refusal Rates by Model", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 0.01 - }, - { - "color": "red", - "value": 0.05 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "id": 10, - "options": { - "displayMode": "gradient", - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "maxVizHeight": 300, - "minVizHeight": 16, - "minVizWidth": 8, - "namePlacement": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "sizing": "auto", - "valueMode": "color" - }, - "pluginVersion": "11.5.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum(rate(llm_request_errors_total{reason=~\"pii_policy_denied|jailbreak_block\"}[5m])) by (model) / sum(rate(llm_model_requests_total[5m])) by (model)", - "legendFormat": "{{model}}", - "range": true, - "refId": "A" - } - ], - "title": "Refusal Rate Percentage by Model", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "currencyUSD" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 40 - }, - "id": 11, - "options": { - "displayMode": "gradient", - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "maxVizHeight": 300, - "minVizHeight": 16, - "minVizWidth": 8, - "namePlacement": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "sizing": "auto", - "valueMode": "color" - }, - "pluginVersion": "11.5.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum(llm_model_cost_total{currency=\"USD\"}) by (model)", - "legendFormat": "{{model}}", - "range": true, - "refId": "A" - } - ], - "title": "Total Cost by Model (USD)", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Seconds", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 40 - }, - "id": 12, - "options": { - "legend": { - "calcs": [ - "mean", - "max", - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.5.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.50, sum(rate(llm_model_completion_latency_seconds_bucket[5m])) by (le, model))", - "legendFormat": "p50 {{model}}", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.90, sum(rate(llm_model_completion_latency_seconds_bucket[5m])) by (le, model))", - "legendFormat": "p90 {{model}}", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum(rate(llm_model_completion_latency_seconds_bucket[5m])) by (le, model))", - "legendFormat": "p99 {{model}}", - "range": true, - "refId": "C" - } - ], - "title": "Model Completion Latency (p50/p90/p99)", - "type": "timeseries" - } - ], - "preload": false, - "refresh": "10s", - "schemaVersion": 40, - "tags": [ - "llm-router" - ], - "templating": { - "list": [ - { - "current": { - "text": "prometheus", - "value": "prometheus" - }, - "includeAll": false, - "name": "DS_PROMETHEUS", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - } - ] - }, - "time": { - "from": "now-5m", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "LLM Router Metrics", - "uid": "llm-router-metrics", - "version": 14, - "weekStart": "" -} \ No newline at end of file diff --git a/deploy/tracing/README.md b/deploy/tracing/README.md deleted file mode 100644 index 51927f30..00000000 --- a/deploy/tracing/README.md +++ /dev/null @@ -1,155 +0,0 @@ -# Distributed Tracing Deployment Example - -This directory contains an example deployment configuration for testing distributed tracing with Jaeger. - -## Quick Start - -1. **Start the services**: - -```bash -docker-compose -f ../docker-compose.tracing.yaml up -d -``` - -2. **Access the UIs**: - -- Jaeger UI: http://localhost:16686 -- Grafana: http://localhost:3000 -- Router API: http://localhost:8080 - -3. **Send test requests**: - -```bash -# Example request -curl -X POST http://localhost:8080/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "auto", - "messages": [{"role": "user", "content": "What is 2+2?"}] - }' -``` - -4. **View traces in Jaeger**: - -- Navigate to http://localhost:16686 -- Select service: `vllm-semantic-router` -- Click "Find Traces" - -## Configuration - -The router is configured with: - -```yaml -observability: - tracing: - enabled: true - provider: "opentelemetry" - exporter: - type: "otlp" - endpoint: "jaeger:4317" - insecure: true - sampling: - type: "always_on" - resource: - service_name: "vllm-semantic-router" -``` - -## Services - -### Jaeger - -- **OTLP gRPC**: Port 4317 -- **OTLP HTTP**: Port 4318 -- **Jaeger UI**: Port 16686 -- **Collector**: Port 14268 - -### Semantic Router - -- **gRPC ExtProc**: Port 50051 -- **Classification API**: Port 8080 -- **Metrics**: Port 9190 - -### Grafana - -- **Web UI**: Port 3000 -- Default credentials: admin/admin -- Pre-configured with Jaeger data source - -## Trace Examples - -### Request Flow - -``` -semantic_router.request.received [2ms] -├─ semantic_router.classification [45ms] -│ └─ category: math, confidence: 0.95 -├─ semantic_router.security.jailbreak_detection [12ms] -│ └─ jailbreak.detected: false -├─ semantic_router.cache.lookup [3ms] -│ └─ cache.hit: false -├─ semantic_router.routing.decision [5ms] -│ └─ selected_model: gpt-4, reasoning: true -└─ semantic_router.backend.selection [2ms] - └─ endpoint: endpoint1 -``` - -### Key Attributes - -- `request.id`: Unique request identifier -- `category.name`: Classified category -- `routing.selected_model`: Selected model -- `reasoning.enabled`: Reasoning mode -- `cache.hit`: Cache hit status - -## Stopping Services - -```bash -docker-compose -f ../docker-compose.tracing.yaml down -``` - -To remove volumes: - -```bash -docker-compose -f ../docker-compose.tracing.yaml down -v -``` - -## Troubleshooting - -### Traces not appearing - -1. Check Jaeger is running: - -```bash -curl http://localhost:16686 -``` - -2. Verify router can connect to Jaeger: - -```bash -docker logs semantic-router | grep -i tracing -``` - -3. Check for initialization message: - -``` -Distributed tracing initialized (provider: opentelemetry, exporter: otlp) -``` - -### Router fails to start - -1. Check configuration: - -```bash -docker logs semantic-router -``` - -2. Verify Jaeger is ready: - -```bash -docker logs jaeger -``` - -## Next Steps - -- [Full Tracing Documentation](../../website/docs/tutorials/observability/distributed-tracing.md) -- [Quick Start Guide](../../website/docs/tutorials/observability/tracing-quickstart.md) -- [Configuration Reference](../../config/config.production.yaml) diff --git a/tools/make/docker.mk b/tools/make/docker.mk index 8c1ea494..e698097f 100644 --- a/tools/make/docker.mk +++ b/tools/make/docker.mk @@ -6,6 +6,12 @@ DOCKER_REGISTRY ?= ghcr.io/vllm-project/semantic-router DOCKER_TAG ?= latest +# Default docker compose environment +# Point Compose to the relocated main stack by default; override by exporting COMPOSE_FILE +export COMPOSE_FILE ?= deploy/docker-compose/docker-compose.yml +# Keep a stable project name so network/volume names are predictable across runs +export COMPOSE_PROJECT_NAME ?= semantic-router + # Build all Docker images docker-build-all: docker-build-extproc docker-build-llm-katan docker-build-precommit @$(LOG_TARGET) @@ -79,21 +85,35 @@ docker-push-llm-katan: @echo "Pushing llm-katan Docker image..." @$(CONTAINER_RUNTIME) push $(DOCKER_REGISTRY)/llm-katan:$(DOCKER_TAG) -# Docker compose shortcuts +# Docker compose build flag logic +# Usage: make docker-compose-up REBUILD=1 (forces image rebuild) +BUILD_FLAG=$(if $(REBUILD),--build,) + +# Docker compose shortcuts (no rebuild by default) docker-compose-up: @$(LOG_TARGET) - @echo "Starting services with docker-compose..." - @docker compose up --build + @echo "Starting services with docker-compose (REBUILD=$(REBUILD))..." + @docker compose up -d $(BUILD_FLAG) docker-compose-up-testing: @$(LOG_TARGET) - @echo "Starting services with testing profile..." - @docker compose --profile testing up --build + @echo "Starting services with testing profile (REBUILD=$(REBUILD))..." + @docker compose --profile testing up -d $(BUILD_FLAG) docker-compose-up-llm-katan: @$(LOG_TARGET) - @echo "Starting services with llm-katan profile..." - @docker compose --profile llm-katan up --build + @echo "Starting services with llm-katan profile (REBUILD=$(REBUILD))..." + @docker compose --profile llm-katan up -d $(BUILD_FLAG) + +# Explicit rebuild targets for convenience +docker-compose-rebuild: REBUILD=1 +docker-compose-rebuild: docker-compose-up + +docker-compose-rebuild-testing: REBUILD=1 +docker-compose-rebuild-testing: docker-compose-up-testing + +docker-compose-rebuild-llm-katan: REBUILD=1 +docker-compose-rebuild-llm-katan: docker-compose-up-llm-katan docker-compose-down: @$(LOG_TARGET) @@ -111,10 +131,13 @@ docker-help: @echo " docker-run-llm-katan - Run llm-katan Docker image locally" @echo " docker-run-llm-katan-custom SERVED_NAME=name - Run with custom served model name" @echo " docker-clean - Clean up Docker images" - @echo " docker-compose-up - Start docker-compose services" - @echo " docker-compose-up-testing - Start with testing profile" - @echo " docker-compose-up-llm-katan - Start with llm-katan profile" - @echo " docker-compose-down - Stop docker-compose services" + @echo " docker-compose-up - Start services (add REBUILD=1 to rebuild)" + @echo " docker-compose-up-testing - Start with testing profile (REBUILD=1 optional)" + @echo " docker-compose-up-llm-katan - Start with llm-katan profile (REBUILD=1 optional)" + @echo " docker-compose-rebuild - Force rebuild then start" + @echo " docker-compose-rebuild-testing - Force rebuild (testing profile)" + @echo " docker-compose-rebuild-llm-katan - Force rebuild (llm-katan profile)" + @echo " docker-compose-down - Stop docker-compose services" @echo "" @echo "Environment Variables:" @echo " DOCKER_REGISTRY - Docker registry (default: ghcr.io/vllm-project/semantic-router)" diff --git a/tools/make/observability.mk b/tools/make/observability.mk index 5156deff..5778ecef 100644 --- a/tools/make/observability.mk +++ b/tools/make/observability.mk @@ -35,15 +35,15 @@ open-observability: ## o11y-logs: Show logs from observability containers o11y-logs: - @docker compose -f docker-compose.obs.yml logs -f 2>/dev/null || docker compose logs prometheus grafana -f + @docker compose -f tools/observability/docker-compose.obs.yml logs -f 2>/dev/null || docker compose -f deploy/docker-compose/docker-compose.yml logs prometheus grafana -f ## o11y-status: Check status of observability containers o11y-status: @echo "==> Local mode:" - @docker compose -f docker-compose.obs.yml ps 2>/dev/null || echo " Not running" + @docker compose -f tools/observability/docker-compose.obs.yml ps 2>/dev/null || echo " Not running" @echo "" @echo "==> Compose mode:" - @docker compose ps prometheus grafana 2>/dev/null || echo " Not running" + @docker compose -f deploy/docker-compose/docker-compose.yml ps prometheus grafana 2>/dev/null || echo " Not running" ## o11y-clean: Remove observability data volumes o11y-clean: diff --git a/tools/observability/README.md b/tools/observability/README.md index 8635a238..8cf302b7 100644 --- a/tools/observability/README.md +++ b/tools/observability/README.md @@ -28,3 +28,17 @@ make o11y-compose - Prometheus: http://localhost:9090 - Grafana: http://localhost:3000 (admin/admin) + +## Tracing Stack (Jaeger) + +For distributed tracing (Jaeger + a tracing-enabled router instance), see: + +`tools/tracing/docker-compose.tracing.yaml` + +Start it independently: + +```bash +make docker-compose-tracing-up +``` + +Then view Jaeger at http://localhost:16686 and (optional) tracing router API at http://localhost:8081. diff --git a/docker-compose.obs.yml b/tools/observability/docker-compose.obs.yml similarity index 66% rename from docker-compose.obs.yml rename to tools/observability/docker-compose.obs.yml index 35bde379..b159a163 100644 --- a/docker-compose.obs.yml +++ b/tools/observability/docker-compose.obs.yml @@ -1,7 +1,7 @@ # Local observability stack for monitoring semantic-router running on host # -# Usage: make obs-local -# Or: docker compose -f docker-compose.obs.yml up +# Usage: docker compose -f tools/observability/docker-compose.obs.yml up +# Or via Make: make o11y-local # # This provides Prometheus and Grafana in Docker with network_mode: host # to scrape metrics from router running natively on localhost:9190 @@ -14,7 +14,7 @@ services: container_name: prometheus-local network_mode: host volumes: - - ./tools/observability/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro + - ./prometheus.yaml:/etc/prometheus/prometheus.yaml:ro - prometheus-local-data:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yaml' @@ -28,9 +28,9 @@ services: container_name: grafana-local network_mode: host volumes: - - ./tools/observability/grafana-datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro - - ./tools/observability/grafana-dashboard.yaml:/etc/grafana/provisioning/dashboards/dashboard.yaml:ro - - ./tools/observability/llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro + - ./grafana-datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro + - ./grafana-dashboard.yaml:/etc/grafana/provisioning/dashboards/dashboard.yaml:ro + - ./llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro - grafana-local-data:/var/lib/grafana environment: - PROMETHEUS_URL=localhost:9090 diff --git a/tools/observability/scripts/start-observability.sh b/tools/observability/scripts/start-observability.sh index 0d070741..d3c8d474 100755 --- a/tools/observability/scripts/start-observability.sh +++ b/tools/observability/scripts/start-observability.sh @@ -48,11 +48,11 @@ MODE="${1:-local}" case "${MODE}" in local) log_info "Starting observability in LOCAL mode (router on host, observability in Docker)" - COMPOSE_FILE="${PROJECT_ROOT}/docker-compose.obs.yml" + COMPOSE_FILE="${PROJECT_ROOT}/tools/observability/docker-compose.obs.yml" ;; compose) log_info "Starting observability in COMPOSE mode (all services in Docker)" - COMPOSE_FILE="${PROJECT_ROOT}/docker-compose.yml" + COMPOSE_FILE="${PROJECT_ROOT}/deploy/docker-compose/docker-compose.yml" ;; *) log_error "Invalid mode: ${MODE}" diff --git a/tools/observability/scripts/stop-observability.sh b/tools/observability/scripts/stop-observability.sh index fa491589..23e6b500 100755 --- a/tools/observability/scripts/stop-observability.sh +++ b/tools/observability/scripts/stop-observability.sh @@ -61,7 +61,7 @@ if any_container_exists "prometheus-local" "grafana-local"; then fi if [ "${LOCAL_MODE_RUNNING}" = true ]; then log_info "Stopping local mode containers..." - docker compose -f "${PROJECT_ROOT}/docker-compose.obs.yml" down + docker compose -f "${PROJECT_ROOT}/tools/observability/docker-compose.obs.yml" down fi # Also stop compose mode if running as part of main stack @@ -77,7 +77,7 @@ fi if [ "${COMPOSE_O11Y_RUNNING}" = true ] && [ "${ROUTER_RUNNING}" = false ]; then log_warn "Observability containers from main stack are running" - log_info "Use 'docker compose down' to stop the full stack" + log_info "Use 'docker compose -f deploy/docker-compose/docker-compose.yml down' to stop the full stack" fi echo "" diff --git a/tools/tracing/README.md b/tools/tracing/README.md new file mode 100644 index 00000000..366853b4 --- /dev/null +++ b/tools/tracing/README.md @@ -0,0 +1,67 @@ +# Tracing (Local Development Stack) + +This directory provides a local Jaeger + tracing-enabled semantic-router stack for development, debugging, and demonstration. + +## Why here? +`tools/tracing` groups this with other local-only utilities (see `tools/observability` for metrics stack). Production deployments should rely on manifests in `deploy/kubernetes` / `openshift` instead of this all-in-one compose. + +## Quick Start + +```bash +docker compose -f tools/tracing/docker-compose.tracing.yaml up -d +``` + +## Access + +- Jaeger UI: http://localhost:16686 +- Tracing Router API: http://localhost:8081 +- Metrics (tracing instance): http://localhost:9191/metrics + +## Send a Test Request + +```bash +curl -X POST http://localhost:8081/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "auto", + "messages": [{"role": "user", "content": "What is 2+2?"}] + }' +``` + +## View Traces + +1. Open Jaeger UI +2. Select service: `vllm-semantic-router` (or `semantic-router` depending on OTEL resource config) +3. Click "Find Traces" + +## Stopping + +```bash +docker compose -f tools/tracing/docker-compose.tracing.yaml down +``` + +With volumes removal: + +```bash +docker compose -f tools/tracing/docker-compose.tracing.yaml down -v +``` + +## Environment Variables + +| Variable | Purpose | +|----------|---------| +| OTEL_EXPORTER_OTLP_ENDPOINT | Where spans are exported (gRPC) | +| OTEL_SERVICE_NAME | Logical service name in traces | + +## Relationship with Metrics Stack +If you also want Prometheus/Grafana metrics: + +```bash +make o11y-local # or o11y-compose for full docker mode +``` + +## Next Steps + +- Distributed tracing docs: `website/docs/tutorials/observability/distributed-tracing.md` +- Tracing quickstart: `website/docs/tutorials/observability/tracing-quickstart.md` +- Router config examples: `config/` diff --git a/tools/tracing/docker-compose.tracing.yaml b/tools/tracing/docker-compose.tracing.yaml new file mode 100644 index 00000000..749a2707 --- /dev/null +++ b/tools/tracing/docker-compose.tracing.yaml @@ -0,0 +1,54 @@ +version: '3.8' + +services: + jaeger: + image: jaegertracing/all-in-one:latest + container_name: jaeger + ports: + - "4317:4317" + - "4318:4318" + - "16686:16686" + - "14268:14268" + environment: + - COLLECTOR_OTLP_ENABLED=true + networks: + - tracing-network + + semantic-router: + build: + context: ../../ + dockerfile: Dockerfile.extproc + image: semantic-router-tracing:latest + container_name: semantic-router-tracing + depends_on: + - jaeger + ports: + - "50052:50051" + - "8081:8080" + - "9191:9190" + volumes: + - ../../config:/app/config:ro + - ../../models:/app/models:ro + environment: + - LD_LIBRARY_PATH=/app/lib + - CONFIG_FILE=/app/config/config.tracing.yaml + - OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger:4317 + - OTEL_SERVICE_NAME=semantic-router + networks: + - tracing-network + + grafana: + image: grafana/grafana:latest + container_name: grafana-tracing + profiles: ["grafana"] + ports: + - "33000:3000" + environment: + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + networks: + - tracing-network + +networks: + tracing-network: + driver: bridge diff --git a/website/docs/troubleshooting/container-connectivity.md b/website/docs/troubleshooting/container-connectivity.md index 3d3681af..456123a7 100644 --- a/website/docs/troubleshooting/container-connectivity.md +++ b/website/docs/troubleshooting/container-connectivity.md @@ -112,7 +112,7 @@ Fix - Map the needed ports with `ports:`. -Example docker-compose.yml snippet +Example docker-compose.yml snippet (from `deploy/docker-compose/docker-compose.yml` after relocation) ```yaml services: diff --git a/website/docs/troubleshooting/network-tips.md b/website/docs/troubleshooting/network-tips.md index 4820cc31..c4a29bce 100644 --- a/website/docs/troubleshooting/network-tips.md +++ b/website/docs/troubleshooting/network-tips.md @@ -39,11 +39,11 @@ The router will download embedding models on first run unless you provide them l model_id: /app/models/all-MiniLM-L12-v2 ``` -3) No extra env is required. `docker-compose.yml` already mounts `./models:/app/models:ro`. +3) No extra env is required. `deploy/docker-compose/docker-compose.yml` already mounts `./models:/app/models:ro`. ### Option B — Use HF cache + mirror -Create a compose override to persist cache and use a regional mirror (example below uses a China mirror). Save as `docker-compose.override.yml` in the repo root: +Create a compose override to persist cache and use a regional mirror (example below uses a China mirror). Save as `docker-compose.override.yml` in the repo root (Compose will automatically combine it with `deploy/docker-compose/docker-compose.yml` when you specify both): ```yaml services: @@ -164,14 +164,14 @@ services: With the overrides in place, build and run normally (Compose will auto-merge): ```bash -# Build all images with overrides -docker compose -f docker-compose.yml -f docker-compose.override.yml build +# Build all images with overrides (explicitly reference the relocated compose file) +docker compose -f deploy/docker-compose/docker-compose.yml -f docker-compose.override.yml build # Run router + envoy -docker compose -f docker-compose.yml -f docker-compose.override.yml up -d +docker compose -f deploy/docker-compose/docker-compose.yml -f docker-compose.override.yml up -d # If you need the testing profile (mock-vllm) -docker compose -f docker-compose.yml -f docker-compose.override.yml --profile testing up -d +docker compose -f deploy/docker-compose/docker-compose.yml -f docker-compose.override.yml --profile testing up -d ``` ## 5. Kubernetes clusters with limited egress