feat: support docker-compose

Xunzhuo · Xunzhuo · commit a0f8114e2efe · 2025-08-30T17:24:39.000+08:00
Signed-off-by: bitliu &lt;bitliu@tencent.com&gt;
diff --git a/config/envoy-docker.yaml b/config/envoy-docker.yaml
@@ -0,0 +1,120 @@
+static_resources:
+  listeners:
+  - name: listener_0
+    address:
+      socket_address:
+        address: 0.0.0.0
+        port_value: 8801
+    filter_chains:
+    - filters:
+      - name: envoy.filters.network.http_connection_manager
+        typed_config:
+          "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+          stat_prefix: ingress_http
+          access_log:
+          - name: envoy.access_loggers.stdout
+            typed_config:
+              "@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog
+              log_format:
+                json_format:
+                  time: "%START_TIME%"
+                  protocol: "%PROTOCOL%"
+                  request_method: "%REQ(:METHOD)%"
+                  request_path: "%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
+                  response_code: "%RESPONSE_CODE%"
+                  response_flags: "%RESPONSE_FLAGS%"
+                  bytes_received: "%BYTES_RECEIVED%"
+                  bytes_sent: "%BYTES_SENT%"
+                  duration: "%DURATION%"
+                  upstream_host: "%UPSTREAM_HOST%"
+                  upstream_cluster: "%UPSTREAM_CLUSTER%"
+                  upstream_local_address: "%UPSTREAM_LOCAL_ADDRESS%"
+                  request_id: "%REQ(X-REQUEST-ID)%"
+                  selected_model: "%REQ(X-SELECTED-MODEL)%"
+                  selected_endpoint: "%REQ(X-SEMANTIC-DESTINATION-ENDPOINT)%"
+          route_config:
+            name: local_route
+            virtual_hosts:
+            - name: local_service
+              domains: ["*"]
+              routes:
+              # Single route using original destination cluster
+              - match:
+                  prefix: "/"
+                route:
+                  cluster: vllm_dynamic_cluster
+                  timeout: 300s
+          http_filters:
+          - name: envoy.filters.http.ext_proc
+            typed_config:
+              "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+              grpc_service:
+                envoy_grpc:
+                  cluster_name: extproc_service
+              allow_mode_override: true
+              processing_mode:
+                request_header_mode: "SEND"
+                response_header_mode: "SEND"
+                request_body_mode: "BUFFERED"
+                response_body_mode: "BUFFERED"
+                request_trailer_mode: "SKIP"
+                response_trailer_mode: "SKIP"
+              failure_mode_allow: true
+              message_timeout: 300s
+          - name: envoy.filters.http.router
+            typed_config:
+              "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+              suppress_envoy_headers: true
+          http2_protocol_options:
+            max_concurrent_streams: 100
+            initial_stream_window_size: 65536
+            initial_connection_window_size: 1048576
+          stream_idle_timeout: "300s"
+          request_timeout: "300s"
+          common_http_protocol_options:
+            idle_timeout: "300s"
+
+  clusters:
+  - name: extproc_service
+    connect_timeout: 300s
+    per_connection_buffer_limit_bytes: 52428800
+    type: STATIC
+    lb_policy: ROUND_ROBIN
+    typed_extension_protocol_options:
+      envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
+        "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
+        explicit_http_config:
+          http2_protocol_options:
+            connection_keepalive:
+              interval: 300s
+              timeout: 300s
+    load_assignment:
+      cluster_name: extproc_service
+      endpoints:
+      - lb_endpoints:
+        - endpoint:
+            address:
+              socket_address:
+                address: semantic-router  # Use Docker service name
+                port_value: 50051
+
+  # Dynamic vLLM cluster using original destination
+  - name: vllm_dynamic_cluster
+    connect_timeout: 300s
+    per_connection_buffer_limit_bytes: 52428800
+    type: ORIGINAL_DST
+    lb_policy: CLUSTER_PROVIDED
+    original_dst_lb_config:
+      use_http_header: true
+      http_header_name: "x-semantic-destination-endpoint"
+    typed_extension_protocol_options:
+      envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
+        "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
+        explicit_http_config:
+          http_protocol_options: {}
+
+admin:
+  address:
+    socket_address:
+      address: "0.0.0.0"
+      port_value: 19000
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,54 @@
+version: '3.8'
+
+services:
+  # Semantic Router External Processor Service
+  semantic-router:
+    build:
+      context: .
+      dockerfile: Dockerfile.extproc
+    container_name: semantic-router
+    ports:
+      - "50051:50051"
+    volumes:
+      - ./config:/app/config:ro
+      - ./models:/app/models:ro
+    environment:
+      - LD_LIBRARY_PATH=/app/lib
+    networks:
+      - semantic-network
+    healthcheck:
+      test: ["CMD", "nc", "-z", "localhost", "50051"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 30s
+
+  # Envoy Proxy Service
+  envoy:
+    image: envoyproxy/envoy:v1.31.7
+    container_name: envoy-proxy
+    ports:
+      - "8801:8801"  # Main proxy port
+      - "19000:19000"  # Admin interface
+    volumes:
+      - ./config/envoy-docker.yaml:/etc/envoy/envoy.yaml:ro
+    command: ["/usr/local/bin/envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "ext_proc:trace,router:trace,http:trace"]
+    depends_on:
+      semantic-router:
+        condition: service_healthy
+    networks:
+      - semantic-network
+    healthcheck:
+      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:19000/ready"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 10s
+
+networks:
+  semantic-network:
+    driver: bridge
+
+volumes:
+  models-cache:
+    driver: local
diff --git a/docker/README.md b/docker/README.md
@@ -0,0 +1,55 @@
+# Docker Compose Quick Start Guide
+
+This Docker Compose configuration allows you to quickly run Semantic Router + Envoy proxy locally.
+
+## Prerequisites
+
+- Docker and Docker Compose
+- Ensure ports 8801, 50051, 19000, and 60000 are not in use
+
+## Install in Docker Compose
+
+1. **Clone the repository and navigate to the project directory**
+   ```bash
+   git clone <repository-url>
+   cd semantic_router
+   ```
+
+2. **Download required models** (if not already present):
+   ```bash
+   make download-models
+   ```
+   This will download the necessary ML models for classification:
+   - Category classifier (ModernBERT-base)
+   - PII classifier (ModernBERT-base)
+   - Jailbreak classifier (ModernBERT-base)
+
+3. **Start the services using Docker Compose**
+   ```bash
+   # Start core services (semantic-router + envoy)
+   docker-compose up --build
+
+   # Or run in background
+   docker-compose up --build -d
+
+   # Start with testing services (includes mock vLLM)
+   docker-compose --profile testing up --build
+   ```
+
+4. **Verify the installation**
+   - Semantic Router: http://localhost:50051 (gRPC service)
+   - Envoy Proxy: http://localhost:8801 (main endpoint)
+   - Envoy Admin: http://localhost:19000 (admin interface)
+   - Mock vLLM (testing): http://localhost:60000 (if using testing profile)
+
+## Quick Start
+
+### 1. Build and Start Services
+
+```bash
+# Start core services (semantic-router + envoy)
+docker-compose up --build
+
+# Or run in background
+docker-compose up --build -d
+```
diff --git a/website/docs/getting-started/docker-compose.md b/website/docs/getting-started/docker-compose.md
@@ -0,0 +1,155 @@
+# Install with Docker Compose
+
+This guide shows you how to quickly set up and run Semantic Router with Envoy using Docker Compose. This is the fastest way to get started without installing dependencies locally.
+
+## Prerequisites
+
+- **Docker**: Version 20.10 or higher
+- **Docker Compose**: Version 2.0 or higher
+- Available ports: 8801 (Envoy), 50051 (Router), 19000 (Admin)
+
+## Quick Start
+
+### 1. Clone the Repository
+
+```bash
+git clone https://github.com/your-org/semantic-router.git
+cd semantic-router
+```
+
+### 2. Download Models (Optional but Recommended)
+
+```bash
+# Install HuggingFace CLI if not already installed
+pip install huggingface_hub
+
+# Download pre-trained models
+make download-models
+```
+
+**Note**: If you skip this step, the models will be downloaded during the first container startup, which may take longer.
+
+### 3. Start Services
+
+```bash
+# Start core services (Semantic Router + Envoy)
+docker-compose up --build
+
+# Or run in background
+docker-compose up --build -d
+```
+
+### 4. Verify Installation
+
+Check that services are running:
+
+```bash
+# Check service status
+docker-compose ps
+
+# Check logs
+docker-compose logs -f
+```
+
+You should see both services healthy:
+- `semantic-router` on port 50051
+- `envoy-proxy` on port 8801
+
+## Testing Your Setup
+
+### Basic Test
+
+```bash
+# Test automatic model selection
+curl -X POST http://localhost:8801/v1/chat/completions \
+  -H "Authorization: Bearer e9197711aa400477d30fe1ff07679e" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "auto",
+    "messages": [
+      {"role": "system", "content": "You are a helpful assistant."},
+      {"role": "user", "content": "Who are you?"}
+    ]
+  }'
+```
+
+### Math Reasoning Test
+
+```bash
+# Test reasoning-enabled routing for math problems
+curl -X POST http://localhost:8801/v1/chat/completions \
+  -H "Authorization: Bearer e9197711aa400477d30fe1ff07679e" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "auto",
+    "messages": [
+      {"role": "system", "content": "You are a professional math teacher."},
+      {"role": "user", "content": "What is the derivative of f(x) = x^3 + 2x^2 - 5x + 7?"}
+    ]
+  }'
+```
+
+## Configuration
+
+### Backend Endpoints
+
+Edit `config/config.yaml` to configure your LLM endpoints:
+
+```yaml
+vllm_endpoints:
+  - name: "your-endpoint"
+    address: "your-llm-server.com"  # Replace with your server
+    port: 11434                     # Replace with your port
+    models:
+      - "your-model-name"           # Replace with your model
+    weight: 1
+
+model_config:
+  "your-model-name":
+    param_count: 671000000000
+    batch_size: 512.0
+    context_size: 65536.0
+    preferred_endpoints: ["your-endpoint"]
+```
+
+After updating the configuration, restart the services:
+
+```bash
+docker-compose restart
+```
+
+## Development Mode
+
+### Live Development
+
+To rebuild and restart after code changes:
+
+```bash
+# Rebuild specific service
+docker-compose build semantic-router
+
+# Restart with new build
+docker-compose up --build semantic-router
+```
+
+## Monitoring and Debugging
+
+### Service Logs
+
+```bash
+# View all logs
+docker-compose logs -f
+
+# View specific service logs
+docker-compose logs -f semantic-router
+docker-compose logs -f envoy
+```
+
+### Envoy Admin Interface
+
+Access Envoy's admin interface at: http://localhost:19000
+
+Useful endpoints:
+- `/stats` - Service statistics
+- `/config_dump` - Current configuration
+- `/clusters` - Upstream cluster status
diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md
@@ -1,4 +1,4 @@
-# Local Install
+# Install in Local
 
 This guide will help you set up and install the Semantic Router on your system. The router runs entirely on CPU and does not require GPU for inference.
 
diff --git a/website/sidebars.js b/website/sidebars.js

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Local Install`
	`1`	`+# Install in Local`
`2`	`2`
`3`	`3`	`This guide will help you set up and install the Semantic Router on your system. The router runs entirely on CPU and does not require GPU for inference.`
`4`	`4`