Skip to content

Commit a0f8114

Browse files
committed
feat: support docker-compose
Signed-off-by: bitliu <[email protected]>
1 parent 6d3a624 commit a0f8114

File tree

6 files changed

+386
-2
lines changed

6 files changed

+386
-2
lines changed

config/envoy-docker.yaml

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
static_resources:
2+
listeners:
3+
- name: listener_0
4+
address:
5+
socket_address:
6+
address: 0.0.0.0
7+
port_value: 8801
8+
filter_chains:
9+
- filters:
10+
- name: envoy.filters.network.http_connection_manager
11+
typed_config:
12+
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
13+
stat_prefix: ingress_http
14+
access_log:
15+
- name: envoy.access_loggers.stdout
16+
typed_config:
17+
"@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog
18+
log_format:
19+
json_format:
20+
time: "%START_TIME%"
21+
protocol: "%PROTOCOL%"
22+
request_method: "%REQ(:METHOD)%"
23+
request_path: "%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
24+
response_code: "%RESPONSE_CODE%"
25+
response_flags: "%RESPONSE_FLAGS%"
26+
bytes_received: "%BYTES_RECEIVED%"
27+
bytes_sent: "%BYTES_SENT%"
28+
duration: "%DURATION%"
29+
upstream_host: "%UPSTREAM_HOST%"
30+
upstream_cluster: "%UPSTREAM_CLUSTER%"
31+
upstream_local_address: "%UPSTREAM_LOCAL_ADDRESS%"
32+
request_id: "%REQ(X-REQUEST-ID)%"
33+
selected_model: "%REQ(X-SELECTED-MODEL)%"
34+
selected_endpoint: "%REQ(X-SEMANTIC-DESTINATION-ENDPOINT)%"
35+
route_config:
36+
name: local_route
37+
virtual_hosts:
38+
- name: local_service
39+
domains: ["*"]
40+
routes:
41+
# Single route using original destination cluster
42+
- match:
43+
prefix: "/"
44+
route:
45+
cluster: vllm_dynamic_cluster
46+
timeout: 300s
47+
http_filters:
48+
- name: envoy.filters.http.ext_proc
49+
typed_config:
50+
"@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
51+
grpc_service:
52+
envoy_grpc:
53+
cluster_name: extproc_service
54+
allow_mode_override: true
55+
processing_mode:
56+
request_header_mode: "SEND"
57+
response_header_mode: "SEND"
58+
request_body_mode: "BUFFERED"
59+
response_body_mode: "BUFFERED"
60+
request_trailer_mode: "SKIP"
61+
response_trailer_mode: "SKIP"
62+
failure_mode_allow: true
63+
message_timeout: 300s
64+
- name: envoy.filters.http.router
65+
typed_config:
66+
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
67+
suppress_envoy_headers: true
68+
http2_protocol_options:
69+
max_concurrent_streams: 100
70+
initial_stream_window_size: 65536
71+
initial_connection_window_size: 1048576
72+
stream_idle_timeout: "300s"
73+
request_timeout: "300s"
74+
common_http_protocol_options:
75+
idle_timeout: "300s"
76+
77+
clusters:
78+
- name: extproc_service
79+
connect_timeout: 300s
80+
per_connection_buffer_limit_bytes: 52428800
81+
type: STATIC
82+
lb_policy: ROUND_ROBIN
83+
typed_extension_protocol_options:
84+
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
85+
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
86+
explicit_http_config:
87+
http2_protocol_options:
88+
connection_keepalive:
89+
interval: 300s
90+
timeout: 300s
91+
load_assignment:
92+
cluster_name: extproc_service
93+
endpoints:
94+
- lb_endpoints:
95+
- endpoint:
96+
address:
97+
socket_address:
98+
address: semantic-router # Use Docker service name
99+
port_value: 50051
100+
101+
# Dynamic vLLM cluster using original destination
102+
- name: vllm_dynamic_cluster
103+
connect_timeout: 300s
104+
per_connection_buffer_limit_bytes: 52428800
105+
type: ORIGINAL_DST
106+
lb_policy: CLUSTER_PROVIDED
107+
original_dst_lb_config:
108+
use_http_header: true
109+
http_header_name: "x-semantic-destination-endpoint"
110+
typed_extension_protocol_options:
111+
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
112+
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
113+
explicit_http_config:
114+
http_protocol_options: {}
115+
116+
admin:
117+
address:
118+
socket_address:
119+
address: "0.0.0.0"
120+
port_value: 19000

docker-compose.yml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
version: '3.8'
2+
3+
services:
4+
# Semantic Router External Processor Service
5+
semantic-router:
6+
build:
7+
context: .
8+
dockerfile: Dockerfile.extproc
9+
container_name: semantic-router
10+
ports:
11+
- "50051:50051"
12+
volumes:
13+
- ./config:/app/config:ro
14+
- ./models:/app/models:ro
15+
environment:
16+
- LD_LIBRARY_PATH=/app/lib
17+
networks:
18+
- semantic-network
19+
healthcheck:
20+
test: ["CMD", "nc", "-z", "localhost", "50051"]
21+
interval: 10s
22+
timeout: 5s
23+
retries: 5
24+
start_period: 30s
25+
26+
# Envoy Proxy Service
27+
envoy:
28+
image: envoyproxy/envoy:v1.31.7
29+
container_name: envoy-proxy
30+
ports:
31+
- "8801:8801" # Main proxy port
32+
- "19000:19000" # Admin interface
33+
volumes:
34+
- ./config/envoy-docker.yaml:/etc/envoy/envoy.yaml:ro
35+
command: ["/usr/local/bin/envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "ext_proc:trace,router:trace,http:trace"]
36+
depends_on:
37+
semantic-router:
38+
condition: service_healthy
39+
networks:
40+
- semantic-network
41+
healthcheck:
42+
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:19000/ready"]
43+
interval: 10s
44+
timeout: 5s
45+
retries: 5
46+
start_period: 10s
47+
48+
networks:
49+
semantic-network:
50+
driver: bridge
51+
52+
volumes:
53+
models-cache:
54+
driver: local

docker/README.md

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Docker Compose Quick Start Guide
2+
3+
This Docker Compose configuration allows you to quickly run Semantic Router + Envoy proxy locally.
4+
5+
## Prerequisites
6+
7+
- Docker and Docker Compose
8+
- Ensure ports 8801, 50051, 19000, and 60000 are not in use
9+
10+
## Install in Docker Compose
11+
12+
1. **Clone the repository and navigate to the project directory**
13+
```bash
14+
git clone <repository-url>
15+
cd semantic_router
16+
```
17+
18+
2. **Download required models** (if not already present):
19+
```bash
20+
make download-models
21+
```
22+
This will download the necessary ML models for classification:
23+
- Category classifier (ModernBERT-base)
24+
- PII classifier (ModernBERT-base)
25+
- Jailbreak classifier (ModernBERT-base)
26+
27+
3. **Start the services using Docker Compose**
28+
```bash
29+
# Start core services (semantic-router + envoy)
30+
docker-compose up --build
31+
32+
# Or run in background
33+
docker-compose up --build -d
34+
35+
# Start with testing services (includes mock vLLM)
36+
docker-compose --profile testing up --build
37+
```
38+
39+
4. **Verify the installation**
40+
- Semantic Router: http://localhost:50051 (gRPC service)
41+
- Envoy Proxy: http://localhost:8801 (main endpoint)
42+
- Envoy Admin: http://localhost:19000 (admin interface)
43+
- Mock vLLM (testing): http://localhost:60000 (if using testing profile)
44+
45+
## Quick Start
46+
47+
### 1. Build and Start Services
48+
49+
```bash
50+
# Start core services (semantic-router + envoy)
51+
docker-compose up --build
52+
53+
# Or run in background
54+
docker-compose up --build -d
55+
```
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
# Install with Docker Compose
2+
3+
This guide shows you how to quickly set up and run Semantic Router with Envoy using Docker Compose. This is the fastest way to get started without installing dependencies locally.
4+
5+
## Prerequisites
6+
7+
- **Docker**: Version 20.10 or higher
8+
- **Docker Compose**: Version 2.0 or higher
9+
- Available ports: 8801 (Envoy), 50051 (Router), 19000 (Admin)
10+
11+
## Quick Start
12+
13+
### 1. Clone the Repository
14+
15+
```bash
16+
git clone https://github.com/your-org/semantic-router.git
17+
cd semantic-router
18+
```
19+
20+
### 2. Download Models (Optional but Recommended)
21+
22+
```bash
23+
# Install HuggingFace CLI if not already installed
24+
pip install huggingface_hub
25+
26+
# Download pre-trained models
27+
make download-models
28+
```
29+
30+
**Note**: If you skip this step, the models will be downloaded during the first container startup, which may take longer.
31+
32+
### 3. Start Services
33+
34+
```bash
35+
# Start core services (Semantic Router + Envoy)
36+
docker-compose up --build
37+
38+
# Or run in background
39+
docker-compose up --build -d
40+
```
41+
42+
### 4. Verify Installation
43+
44+
Check that services are running:
45+
46+
```bash
47+
# Check service status
48+
docker-compose ps
49+
50+
# Check logs
51+
docker-compose logs -f
52+
```
53+
54+
You should see both services healthy:
55+
- `semantic-router` on port 50051
56+
- `envoy-proxy` on port 8801
57+
58+
## Testing Your Setup
59+
60+
### Basic Test
61+
62+
```bash
63+
# Test automatic model selection
64+
curl -X POST http://localhost:8801/v1/chat/completions \
65+
-H "Authorization: Bearer e9197711aa400477d30fe1ff07679e" \
66+
-H "Content-Type: application/json" \
67+
-d '{
68+
"model": "auto",
69+
"messages": [
70+
{"role": "system", "content": "You are a helpful assistant."},
71+
{"role": "user", "content": "Who are you?"}
72+
]
73+
}'
74+
```
75+
76+
### Math Reasoning Test
77+
78+
```bash
79+
# Test reasoning-enabled routing for math problems
80+
curl -X POST http://localhost:8801/v1/chat/completions \
81+
-H "Authorization: Bearer e9197711aa400477d30fe1ff07679e" \
82+
-H "Content-Type: application/json" \
83+
-d '{
84+
"model": "auto",
85+
"messages": [
86+
{"role": "system", "content": "You are a professional math teacher."},
87+
{"role": "user", "content": "What is the derivative of f(x) = x^3 + 2x^2 - 5x + 7?"}
88+
]
89+
}'
90+
```
91+
92+
## Configuration
93+
94+
### Backend Endpoints
95+
96+
Edit `config/config.yaml` to configure your LLM endpoints:
97+
98+
```yaml
99+
vllm_endpoints:
100+
- name: "your-endpoint"
101+
address: "your-llm-server.com" # Replace with your server
102+
port: 11434 # Replace with your port
103+
models:
104+
- "your-model-name" # Replace with your model
105+
weight: 1
106+
107+
model_config:
108+
"your-model-name":
109+
param_count: 671000000000
110+
batch_size: 512.0
111+
context_size: 65536.0
112+
preferred_endpoints: ["your-endpoint"]
113+
```
114+
115+
After updating the configuration, restart the services:
116+
117+
```bash
118+
docker-compose restart
119+
```
120+
121+
## Development Mode
122+
123+
### Live Development
124+
125+
To rebuild and restart after code changes:
126+
127+
```bash
128+
# Rebuild specific service
129+
docker-compose build semantic-router
130+
131+
# Restart with new build
132+
docker-compose up --build semantic-router
133+
```
134+
135+
## Monitoring and Debugging
136+
137+
### Service Logs
138+
139+
```bash
140+
# View all logs
141+
docker-compose logs -f
142+
143+
# View specific service logs
144+
docker-compose logs -f semantic-router
145+
docker-compose logs -f envoy
146+
```
147+
148+
### Envoy Admin Interface
149+
150+
Access Envoy's admin interface at: http://localhost:19000
151+
152+
Useful endpoints:
153+
- `/stats` - Service statistics
154+
- `/config_dump` - Current configuration
155+
- `/clusters` - Upstream cluster status

website/docs/getting-started/installation.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Local Install
1+
# Install in Local
22

33
This guide will help you set up and install the Semantic Router on your system. The router runs entirely on CPU and does not require GPU for inference.
44

0 commit comments

Comments
 (0)