Skip to content

Commit b4fbc78

Browse files
committed
Add initial configuration for GLM service with health checks and logging
0 parents  commit b4fbc78

File tree

2 files changed

+166
-0
lines changed

2 files changed

+166
-0
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: Validate Compose Files
2+
3+
on:
4+
push:
5+
pull_request:
6+
7+
jobs:
8+
validate:
9+
runs-on: ubuntu-latest
10+
steps:
11+
- uses: actions/checkout@v4
12+
13+
- name: Validate docker compose files
14+
run: |
15+
exit_code=0
16+
for file in *.yaml; do
17+
echo "Validating $file..."
18+
if ! docker compose -f "$file" config --format=yaml > /dev/null 2>&1; then
19+
echo "::error file=$file::Invalid compose file"
20+
docker compose -f "$file" config --format=yaml
21+
exit_code=1
22+
else
23+
echo "$file OK"
24+
fi
25+
done
26+
exit $exit_code

GLM-4.7.yaml

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
x-logging-conf: &logging-conf
2+
driver: "json-file"
3+
options:
4+
max-size: "100m"
5+
max-file: "10"
6+
labels: "com.datadoghq.ad.logs"
7+
8+
x-vllm-healthcheck: &vllm-healthcheck
9+
test: ["CMD", "curl", "-f", "http://localhost:8000/v1/models"]
10+
interval: 10s
11+
timeout: 10s
12+
retries: 100
13+
start_period: 3600s
14+
15+
x-nvidia: &nvidia
16+
runtime: nvidia
17+
ipc: host
18+
privileged: true
19+
ulimits:
20+
memlock: -1
21+
nofile:
22+
soft: 65535
23+
hard: 65535
24+
25+
x-vllm-common: &vllm-common
26+
<<: *nvidia
27+
volumes:
28+
- hugginface_cache:/root/.cache/huggingface
29+
- vllm_cache:/root/.cache/vllm
30+
healthcheck: *vllm-healthcheck
31+
restart: unless-stopped
32+
logging: *logging-conf
33+
34+
x-vllm-proxy-common: &vllm-proxy-common
35+
image: nearaidev/vllm-proxy@sha256:7fff3d0446a01609e6a45105ef60777bb6038805161793b50c2d8e4a34ac537b
36+
user: root
37+
<<: *nvidia
38+
volumes:
39+
- /var/run/dstack.sock:/var/run/dstack.sock
40+
restart: unless-stopped
41+
environment:
42+
- NVIDIA_VISIBLE_DEVICES=all
43+
logging: *logging-conf
44+
45+
services:
46+
datadog-agent:
47+
image: datadog/agent@sha256:0920550d798e459025620e6c3f9b0e857db94b9f29762a4e194a4a3967037498
48+
container_name: datadog-agent
49+
environment:
50+
- DD_API_KEY=${DD_API_KEY}
51+
- DD_SITE=us3.datadoghq.com
52+
- DD_ENV=prod
53+
- DD_LOGS_ENABLED=true
54+
- DD_LOGS_CONFIG_CONTAINER_COLLECT_ALL=true
55+
- DD_CONTAINER_EXCLUDE_LOGS="name:datadog-agent"
56+
- DD_PROCESS_AGENT_ENABLED=true
57+
- DD_DOGSTATSD_NON_LOCAL_TRAFFIC=true
58+
- DD_HOSTNAME=$DD_HOSTNAME
59+
- DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_GRPC_ENDPOINT=0.0.0.0:4317
60+
volumes:
61+
- /var/run/docker.sock:/var/run/docker.sock:ro
62+
- /proc/:/host/proc/:ro
63+
- /sys/fs/cgroup/:/host/sys/fs/cgroup:ro
64+
- /var/lib/docker/containers:/var/lib/docker/containers:ro
65+
- /run/log/journal:/run/log/journal:ro
66+
- /run/systemd/:/host/run/systemd/:ro
67+
configs:
68+
- source: journald_config_file
69+
target: /etc/datadog-agent/conf.d/journald.d/conf.yaml
70+
mode: 0755
71+
restart: unless-stopped
72+
logging: *logging-conf
73+
74+
vllm-proxy-glm:
75+
<<: *vllm-proxy-common
76+
container_name: vllm-proxy-glm
77+
ports:
78+
- "8000:8000"
79+
environment:
80+
- MODEL_NAME=zai-org/GLM-4.7
81+
- TOKEN=${PROXY_TOKEN}
82+
- VLLM_BASE_URL=http://vllm-glm:8000
83+
labels:
84+
com.datadoghq.ad.logs: '[{"source": "vllm-proxy", "service": "vllm-proxy", "tags": ["model:zai-org/GLM-4.7", "ip:${HOST_IP}", "port:8000"]}]'
85+
86+
vllm-glm:
87+
<<: *vllm-common
88+
image: lmcache/vllm-openai@sha256:03a8cbda016be1ab5660d1e2910549cbadea85b1111a34572544c1e180538e8b
89+
container_name: vllm-glm
90+
command: >
91+
zai-org/GLM-4.7
92+
--tensor-parallel-size 8
93+
--speculative-config '{"method":"mtp","num_speculative_tokens":1}'
94+
--kv-transfer-config '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}'
95+
--max-model-len 128K
96+
--max-num-batched-tokens 32K
97+
--max-num-seqs 128
98+
--stream-interval 6
99+
--reasoning-parser glm45
100+
--tool-call-parser glm47
101+
--enable-auto-tool-choice
102+
volumes:
103+
- hugginface_cache:/root/.cache/huggingface
104+
- vllm_cache:/root/.cache/vllm
105+
environment:
106+
- HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN}
107+
- VLLM_LOGGING_LEVEL=INFO
108+
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
109+
- OPENBLAS_L2_SIZE=2097152
110+
- NCCL_DEBUG=INFO
111+
- VLLM_CACHE_ROOT=/root/.cache/vllm
112+
- TORCH_FLOAT32_MATMUL_PRECISION=high
113+
- LMCACHE_CHUNK_SIZE=256
114+
- LMCACHE_LOCAL_CPU=True
115+
- LMCACHE_MAX_LOCAL_CPU_SIZE=100
116+
- PYTHONHASHSEED=0
117+
- VLLM_RPC_TIMEOUT=60000
118+
deploy:
119+
resources:
120+
reservations:
121+
devices:
122+
- driver: nvidia
123+
device_ids: ["0","1","2","3","4","5","6","7"]
124+
capabilities: [gpu]
125+
labels:
126+
com.datadoghq.ad.check_names: '["vllm"]'
127+
com.datadoghq.ad.init_configs: "[{}]"
128+
com.datadoghq.ad.logs: '[{"source": "vllm", "service": "vllm", "tags":["model:zai-org/GLM-4.7","ip:${HOST_IP}", "port:8000"]}]'
129+
com.datadoghq.ad.instances: '[{"openmetrics_endpoint":"http://vllm-glm:8000/metrics", "service": "vllm-glm", "tags":["model:zai-org/GLM-4.7","ip:${HOST_IP}", "port:8000"]}]'
130+
131+
volumes:
132+
hugginface_cache:
133+
vllm_cache:
134+
135+
configs:
136+
journald_config_file:
137+
content: |
138+
logs:
139+
- type: journald
140+
container_mode: true

0 commit comments

Comments
 (0)