1+ x-logging-conf : &logging-conf
2+ driver : " json-file"
3+ options :
4+ max-size : " 100m"
5+ max-file : " 10"
6+ labels : " com.datadoghq.ad.logs"
7+
8+ x-vllm-healthcheck : &vllm-healthcheck
9+ test : ["CMD", "curl", "-f", "http://localhost:8000/v1/models"]
10+ interval : 10s
11+ timeout : 10s
12+ retries : 100
13+ start_period : 3600s
14+
15+ x-nvidia : &nvidia
16+ runtime : nvidia
17+ ipc : host
18+ privileged : true
19+ ulimits :
20+ memlock : -1
21+ nofile :
22+ soft : 65535
23+ hard : 65535
24+
25+ x-vllm-common : &vllm-common
26+ << : *nvidia
27+ volumes :
28+ - hugginface_cache:/root/.cache/huggingface
29+ - vllm_cache:/root/.cache/vllm
30+ healthcheck : *vllm-healthcheck
31+ restart : unless-stopped
32+ logging : *logging-conf
33+
34+ x-vllm-proxy-common : &vllm-proxy-common
35+ image : nearaidev/vllm-proxy@sha256:7fff3d0446a01609e6a45105ef60777bb6038805161793b50c2d8e4a34ac537b
36+ user : root
37+ << : *nvidia
38+ volumes :
39+ - /var/run/dstack.sock:/var/run/dstack.sock
40+ restart : unless-stopped
41+ environment :
42+ - NVIDIA_VISIBLE_DEVICES=all
43+ logging : *logging-conf
44+
45+ services :
46+ datadog-agent :
47+ image : datadog/agent@sha256:0920550d798e459025620e6c3f9b0e857db94b9f29762a4e194a4a3967037498
48+ container_name : datadog-agent
49+ environment :
50+ - DD_API_KEY=${DD_API_KEY}
51+ - DD_SITE=us3.datadoghq.com
52+ - DD_ENV=prod
53+ - DD_LOGS_ENABLED=true
54+ - DD_LOGS_CONFIG_CONTAINER_COLLECT_ALL=true
55+ - DD_CONTAINER_EXCLUDE_LOGS="name:datadog-agent"
56+ - DD_PROCESS_AGENT_ENABLED=true
57+ - DD_DOGSTATSD_NON_LOCAL_TRAFFIC=true
58+ - DD_HOSTNAME=$DD_HOSTNAME
59+ - DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_GRPC_ENDPOINT=0.0.0.0:4317
60+ volumes :
61+ - /var/run/docker.sock:/var/run/docker.sock:ro
62+ - /proc/:/host/proc/:ro
63+ - /sys/fs/cgroup/:/host/sys/fs/cgroup:ro
64+ - /var/lib/docker/containers:/var/lib/docker/containers:ro
65+ - /run/log/journal:/run/log/journal:ro
66+ - /run/systemd/:/host/run/systemd/:ro
67+ configs :
68+ - source : journald_config_file
69+ target : /etc/datadog-agent/conf.d/journald.d/conf.yaml
70+ mode : 0755
71+ restart : unless-stopped
72+ logging : *logging-conf
73+
74+ vllm-proxy-glm :
75+ << : *vllm-proxy-common
76+ container_name : vllm-proxy-glm
77+ ports :
78+ - " 8000:8000"
79+ environment :
80+ - MODEL_NAME=zai-org/GLM-4.7
81+ - TOKEN=${PROXY_TOKEN}
82+ - VLLM_BASE_URL=http://vllm-glm:8000
83+ labels :
84+ com.datadoghq.ad.logs : ' [{"source": "vllm-proxy", "service": "vllm-proxy", "tags": ["model:zai-org/GLM-4.7", "ip:${HOST_IP}", "port:8000"]}]'
85+
86+ vllm-glm :
87+ << : *vllm-common
88+ image : lmcache/vllm-openai@sha256:03a8cbda016be1ab5660d1e2910549cbadea85b1111a34572544c1e180538e8b
89+ container_name : vllm-glm
90+ command : >
91+ zai-org/GLM-4.7
92+ --tensor-parallel-size 8
93+ --speculative-config '{"method":"mtp","num_speculative_tokens":1}'
94+ --kv-transfer-config '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}'
95+ --max-model-len 128K
96+ --max-num-batched-tokens 32K
97+ --max-num-seqs 128
98+ --stream-interval 6
99+ --reasoning-parser glm45
100+ --tool-call-parser glm47
101+ --enable-auto-tool-choice
102+ volumes :
103+ - hugginface_cache:/root/.cache/huggingface
104+ - vllm_cache:/root/.cache/vllm
105+ environment :
106+ - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN}
107+ - VLLM_LOGGING_LEVEL=INFO
108+ - NVIDIA_DRIVER_CAPABILITIES=compute,utility
109+ - OPENBLAS_L2_SIZE=2097152
110+ - NCCL_DEBUG=INFO
111+ - VLLM_CACHE_ROOT=/root/.cache/vllm
112+ - TORCH_FLOAT32_MATMUL_PRECISION=high
113+ - LMCACHE_CHUNK_SIZE=256
114+ - LMCACHE_LOCAL_CPU=True
115+ - LMCACHE_MAX_LOCAL_CPU_SIZE=100
116+ - PYTHONHASHSEED=0
117+ - VLLM_RPC_TIMEOUT=60000
118+ deploy :
119+ resources :
120+ reservations :
121+ devices :
122+ - driver : nvidia
123+ device_ids : ["0","1","2","3","4","5","6","7"]
124+ capabilities : [gpu]
125+ labels :
126+ com.datadoghq.ad.check_names : ' ["vllm"]'
127+ com.datadoghq.ad.init_configs : " [{}]"
128+ com.datadoghq.ad.logs : ' [{"source": "vllm", "service": "vllm", "tags":["model:zai-org/GLM-4.7","ip:${HOST_IP}", "port:8000"]}]'
129+ com.datadoghq.ad.instances : ' [{"openmetrics_endpoint":"http://vllm-glm:8000/metrics", "service": "vllm-glm", "tags":["model:zai-org/GLM-4.7","ip:${HOST_IP}", "port:8000"]}]'
130+
131+ volumes :
132+ hugginface_cache :
133+ vllm_cache :
134+
135+ configs :
136+ journald_config_file :
137+ content : |
138+ logs:
139+ - type: journald
140+ container_mode: true
0 commit comments