Skip to content

Commit 19986e2

Browse files
authored
Update docker-compose.yml
Signed-off-by: Shaojun Liu <[email protected]>
1 parent 74bf9ab commit 19986e2

File tree

1 file changed

+116
-22
lines changed

1 file changed

+116
-22
lines changed

deploy/docker-compose/docker-compose.yml

Lines changed: 116 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ services:
1919
- OTEL_SERVICE_NAME=vllm-semantic-router
2020
- HUGGINGFACE_HUB_CACHE=/root/.cache/huggingface
2121
- HF_HUB_ENABLE_HF_TRANSFER=1
22+
- NO_PROXY=envoy-proxy,mongo,semantic-router,localhost,127.0.0.1
23+
- no_proxy=envoy-proxy,mongo,semantic-router,localhost,127.0.0.1
2224
networks:
2325
- semantic-network
2426
healthcheck:
@@ -27,11 +29,19 @@ services:
2729
timeout: 5s
2830
retries: 5
2931
start_period: 30s
30-
32+
depends_on:
33+
mock-vllm-1:
34+
condition: service_healthy
35+
mock-vllm-2:
36+
condition: service_healthy
37+
3138
# Envoy Proxy Service
3239
envoy:
3340
image: envoyproxy/envoy:v1.31.7
3441
container_name: envoy-proxy
42+
environment:
43+
- NO_PROXY=envoy-proxy,mongo,semantic-router,localhost,127.0.0.1
44+
- no_proxy=envoy-proxy,mongo,semantic-router,localhost,127.0.0.1
3545
security_opt:
3646
- label=disable
3747
ports:
@@ -52,24 +62,99 @@ services:
5262
retries: 5
5363
start_period: 10s
5464

55-
# Mock vLLM service for testing profile
56-
mock-vllm:
57-
build:
58-
context: ../../tools/mock-vllm
59-
dockerfile: Dockerfile
60-
container_name: mock-vllm
61-
profiles: ["testing"]
62-
ports:
63-
- "8000:8000"
64-
networks:
65-
semantic-network:
66-
ipv4_address: 172.28.0.10
67-
healthcheck:
68-
test: ["CMD", "curl", "-fsS", "http://localhost:8000/health"]
69-
interval: 10s
70-
timeout: 5s
71-
retries: 5
72-
start_period: 5s
65+
# Mock vLLM service for testing profile
66+
mock-vllm-1:
67+
image: amr-registry.caas.intel.com/intelanalytics/llm-scaler-vllm:temp-b6-better
68+
container_name: mock-vllm-1
69+
profiles: ["testing"]
70+
privileged: true
71+
networks:
72+
semantic-network:
73+
ipv4_address: 172.28.0.30
74+
devices:
75+
- /dev/dri:/dev/dri # GPU 设备
76+
volumes:
77+
- /home/intel/LLM/:/llm/models/
78+
- /home/intel/LLM2/:/llm/models2/
79+
environment:
80+
- no_proxy=localhost,127.0.0.1
81+
- VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
82+
- VLLM_WORKER_MULTIPROC_METHOD=spawn
83+
- ZE_AFFINITY_MASK=0
84+
shm_size: "32g"
85+
entrypoint: /bin/bash
86+
command: >
87+
-c "numactl -C 0-15 python3 -m vllm.entrypoints.openai.api_server
88+
--model /llm/models/DeepSeek-R1-Distill-Qwen-7B/
89+
--served-model-name DeepSeek-R1-Distill-Qwen-7B
90+
--dtype=float16
91+
--enforce-eager
92+
--port 8006
93+
--host 0.0.0.0
94+
--trust-remote-code
95+
--disable-sliding-window
96+
--gpu-memory-util=0.9
97+
--no-enable-prefix-caching
98+
--max-num-batched-tokens=2000
99+
--disable-log-requests
100+
--max-model-len=3000
101+
--block-size 64
102+
-tp=1
103+
-pp=1
104+
--quantization fp8"
105+
healthcheck:
106+
test: ["CMD", "curl", "-fsS", "http://localhost:8006/health"]
107+
interval: 10s
108+
timeout: 5s
109+
retries: 5
110+
start_period: 30s # 增加启动时间,因为加载大模型需要更长时间
111+
112+
# Mock vLLM service for testing profile
113+
mock-vllm-2:
114+
image: amr-registry.caas.intel.com/intelanalytics/llm-scaler-vllm:temp-b6-better
115+
container_name: mock-vllm-2
116+
profiles: ["testing"]
117+
privileged: true
118+
networks:
119+
semantic-network:
120+
ipv4_address: 172.28.0.31
121+
devices:
122+
- /dev/dri:/dev/dri # GPU 设备
123+
volumes:
124+
- /home/intel/LLM/:/llm/models/
125+
- /home/intel/LLM2/:/llm/models2/
126+
environment:
127+
- no_proxy=localhost,127.0.0.1
128+
- VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
129+
- VLLM_WORKER_MULTIPROC_METHOD=spawn
130+
- ZE_AFFINITY_MASK=1
131+
shm_size: "32g"
132+
entrypoint: /bin/bash
133+
command: >
134+
-c "numactl -C 0-15 python3 -m vllm.entrypoints.openai.api_server
135+
--model /llm/models/Qwen3-8B/
136+
--served-model-name qwen3
137+
--dtype=float16
138+
--enforce-eager
139+
--port 8007
140+
--host 0.0.0.0
141+
--trust-remote-code
142+
--disable-sliding-window
143+
--gpu-memory-util=0.9
144+
--no-enable-prefix-caching
145+
--max-num-batched-tokens=2000
146+
--disable-log-requests
147+
--max-model-len=3000
148+
--block-size 64
149+
-tp=1
150+
-pp=1
151+
--quantization fp8"
152+
healthcheck:
153+
test: ["CMD", "curl", "-fsS", "http://localhost:8007/health"]
154+
interval: 10s
155+
timeout: 5s
156+
retries: 5
157+
start_period: 30s # 增加启动时间,因为加载大模型需要更长时间
73158

74159
# Jaeger for distributed tracing (OTLP gRPC + UI)
75160
jaeger:
@@ -105,6 +190,7 @@ services:
105190
- GF_SECURITY_ADMIN_USER=admin
106191
- GF_SECURITY_ADMIN_PASSWORD=admin
107192
- PROMETHEUS_URL=prometheus:9090
193+
- GF_SECURITY_ALLOW_EMBEDDING=true
108194
ports:
109195
- "3000:3000"
110196
volumes:
@@ -135,9 +221,10 @@ services:
135221
networks:
136222
- semantic-network
137223

224+
138225
# Chat UI (Hugging Face) replacing Open WebUI
139226
chat-ui:
140-
image: ghcr.io/huggingface/chat-ui-db:latest
227+
image: ghcr.io/huggingface/chat-ui-db:sha-a2b39bc
141228
container_name: chat-ui
142229
ports:
143230
- "3002:3000" # Expose Chat UI on host 3002
@@ -152,7 +239,10 @@ services:
152239
# Optional theming (override as needed)
153240
- PUBLIC_APP_NAME=${PUBLIC_APP_NAME:-HuggingChat}
154241
- PUBLIC_APP_ASSETS=${PUBLIC_APP_ASSETS:-chatui}
155-
- LOG_LEVEL=${LOG_LEVEL:-info}
242+
- PUBLIC_APP_DATA_SHARING=1
243+
- LOG_LEVEL=${LOG_LEVEL:-debug}
244+
- NO_PROXY=envoy-proxy,mongo,semantic-router,localhost,127.0.0.1
245+
- no_proxy=envoy-proxy,mongo,semantic-router,localhost,127.0.0.1
156246
depends_on:
157247
envoy:
158248
condition: service_started
@@ -161,13 +251,14 @@ services:
161251
mongo:
162252
condition: service_started
163253
healthcheck:
164-
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/"]
254+
test: ["CMD-SHELL", "node -e \"require('http').get('http://localhost:3000/', (res) => process.exit(res.statusCode === 200 ? 0 : 1))\""]
165255
interval: 10s
166256
timeout: 5s
167257
retries: 5
168258
start_period: 20s
169259
networks:
170260
- semantic-network
261+
171262

172263
# Open WebUI Pipelines server (executes Python pipelines)
173264
pipelines:
@@ -195,6 +286,7 @@ services:
195286
image: mongo:7
196287
container_name: mongo
197288
restart: unless-stopped
289+
command: ["mongod", "--setParameter", "logLevel=2"]
198290
volumes:
199291
- mongo-data:/data/db
200292
networks:
@@ -243,6 +335,8 @@ services:
243335
- TARGET_OPENWEBUI_URL=http://openwebui:8080
244336
- TARGET_CHATUI_URL=http://chat-ui:3000
245337
- ROUTER_CONFIG_PATH=/app/config/config.yaml
338+
- NO_PROXY=grafana,chat-ui,semantic-router,prometheus,jaeger,openwebui,mongo,localhost,127.0.0.1
339+
- no_proxy=grafana,chat-ui,semantic-router,prometheus,jaeger,openwebui,mongo,localhost,127.0.0.1
246340
volumes:
247341
- ../../config:/app/config:rw,z
248342
ports:

0 commit comments

Comments
 (0)