Skip to content

Commit 56977b0

Browse files
feat(e2e): Add Dynamo E2E test profile with GPU support
- Add Dynamo profile for GPU-enabled disaggregated vLLM deployment - Add GPU setup integration in Kind cluster (nvidia runtime, library copy, device plugin) - Add DynamoGraphDeployment with Frontend + Prefill + Decode workers - Add Dynamo test cases (health check, GPU utilization, performance) - Fix relative paths in runner.go and profile.go - Re-enable teardown after tests - Remove Dynamo from CI matrix (requires GPU, run manually) - Update README with GPU requirements (3 GPUs minimum) - Remove unused files (namespace.yaml, kustomization.yaml, nvkind-gpu-setup-rhel.md) Requires: VM with 3+ NVIDIA GPUs Run: make e2e-test-dynamo Signed-off-by: abdallahsamabd <[email protected]>
1 parent 4e2ee29 commit 56977b0

File tree

19 files changed

+3232
-10
lines changed

19 files changed

+3232
-10
lines changed

.github/workflows/integration-test-k8s.yml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@ on:
1212
jobs:
1313
integration-test:
1414
runs-on: ubuntu-latest
15-
timeout-minutes: 60
15+
timeout-minutes: 75
16+
strategy:
17+
fail-fast: false # Continue testing other profiles even if one fails
18+
matrix:
19+
profile: [ai-gateway]
1620

1721
steps:
1822
- name: Check out the repo
@@ -61,11 +65,11 @@ jobs:
6165
run: |
6266
make build-e2e
6367
64-
- name: Run Integration E2E tests
68+
- name: Run Integration E2E tests (${{ matrix.profile }})
6569
id: e2e-test
6670
run: |
6771
set +e # Don't exit on error, we want to capture the result
68-
make e2e-test E2E_PROFILE=ai-gateway E2E_VERBOSE=true E2E_KEEP_CLUSTER=false
72+
make e2e-test E2E_PROFILE=${{ matrix.profile }} E2E_VERBOSE=true E2E_KEEP_CLUSTER=false
6973
TEST_EXIT_CODE=$?
7074
echo "test_exit_code=${TEST_EXIT_CODE}" >> $GITHUB_OUTPUT
7175
exit ${TEST_EXIT_CODE}
@@ -74,7 +78,7 @@ jobs:
7478
if: always()
7579
uses: actions/upload-artifact@v4
7680
with:
77-
name: test-reports
81+
name: test-reports-${{ matrix.profile }}
7882
path: |
7983
test-report.json
8084
test-report.md

deploy/kubernetes/dynamo/dynamo-resources/README.md

Lines changed: 628 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
---
2+
# Disaggregated vLLM Deployment for Dynamo
3+
# GPU-enabled configuration for Kind cluster with NVIDIA support
4+
#
5+
# Architecture:
6+
# - Frontend: HTTP API server (GPU 0)
7+
# - VLLMPrefillWorker: Specialized prefill-only worker (GPU 1)
8+
# - VLLMDecodeWorker: Specialized decode-only worker (GPU 2)
9+
#
10+
# GPU Allocation (4 GPUs total):
11+
# - GPU 0: Frontend
12+
# - GPU 1: Prefill Worker
13+
# - GPU 2: Decode Worker
14+
# - GPU 3: (spare)
15+
#
16+
apiVersion: nvidia.com/v1alpha1
17+
kind: DynamoGraphDeployment
18+
metadata:
19+
name: vllm
20+
namespace: dynamo-system
21+
spec:
22+
backendFramework: vllm
23+
envs:
24+
- name: DYN_LOG
25+
value: "info"
26+
services:
27+
# Frontend - HTTP API server
28+
Frontend:
29+
dynamoNamespace: dynamo-vllm
30+
componentType: frontend
31+
replicas: 1
32+
resources:
33+
requests:
34+
cpu: "1"
35+
memory: "4Gi"
36+
gpu: "1"
37+
limits:
38+
cpu: "2"
39+
memory: "8Gi"
40+
gpu: "1"
41+
extraPodSpec:
42+
mainContainer:
43+
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1.post1
44+
command:
45+
- /bin/sh
46+
- -c
47+
args:
48+
- "sleep 15 && export CUDA_VISIBLE_DEVICES=0 && export LD_LIBRARY_PATH=/nvidia-driver-libs:/usr/local/cuda/lib64:$LD_LIBRARY_PATH && python3 -m dynamo.frontend --http-port 8000"
49+
securityContext:
50+
privileged: true
51+
livenessProbe:
52+
tcpSocket:
53+
port: 8000
54+
initialDelaySeconds: 60
55+
periodSeconds: 30
56+
failureThreshold: 5
57+
readinessProbe:
58+
tcpSocket:
59+
port: 8000
60+
initialDelaySeconds: 30
61+
periodSeconds: 10
62+
failureThreshold: 10
63+
startupProbe:
64+
tcpSocket:
65+
port: 8000
66+
initialDelaySeconds: 20
67+
periodSeconds: 10
68+
failureThreshold: 30
69+
env:
70+
- name: ETCD_ENDPOINTS
71+
value: "dynamo-platform-etcd.dynamo-system.svc.cluster.local:2379"
72+
- name: NATS_URL
73+
value: "nats://dynamo-platform-nats.dynamo-system.svc.cluster.local:4222"
74+
- name: NATS_SERVER
75+
value: "nats://dynamo-platform-nats.dynamo-system:4222"
76+
- name: DYN_SYSTEM_ENABLED
77+
value: "true"
78+
- name: DYN_SYSTEM_PORT
79+
value: "9090"
80+
- name: LD_LIBRARY_PATH
81+
value: "/nvidia-driver-libs:/usr/local/cuda/lib64"
82+
- name: NVIDIA_DRIVER_CAPABILITIES
83+
value: "compute,utility"
84+
volumeMounts:
85+
- name: nvidia-driver-libs
86+
mountPath: /nvidia-driver-libs
87+
readOnly: true
88+
- name: dev
89+
mountPath: /dev
90+
volumes:
91+
- name: nvidia-driver-libs
92+
hostPath:
93+
path: /nvidia-driver-libs
94+
- name: dev
95+
hostPath:
96+
path: /dev
97+
98+
# VLLMPrefillWorker - Specialized prefill-only worker (GPU 1)
99+
VLLMPrefillWorker:
100+
dynamoNamespace: dynamo-vllm
101+
componentType: worker
102+
replicas: 1
103+
resources:
104+
requests:
105+
cpu: "1"
106+
memory: "4Gi"
107+
gpu: "1"
108+
limits:
109+
cpu: "2"
110+
memory: "8Gi"
111+
gpu: "1"
112+
extraPodSpec:
113+
mainContainer:
114+
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1.post1
115+
command:
116+
- /bin/sh
117+
- -c
118+
args:
119+
- "sleep 15 && export CUDA_VISIBLE_DEVICES=1 && export LD_LIBRARY_PATH=/nvidia-driver-libs:/usr/local/cuda/lib64:$LD_LIBRARY_PATH && python3 -m dynamo.vllm --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --tensor-parallel-size 1 --enforce-eager --is-prefill-worker --connector null"
120+
securityContext:
121+
privileged: true
122+
livenessProbe:
123+
tcpSocket:
124+
port: 9090
125+
initialDelaySeconds: 180
126+
periodSeconds: 30
127+
failureThreshold: 5
128+
readinessProbe:
129+
tcpSocket:
130+
port: 9090
131+
initialDelaySeconds: 120
132+
periodSeconds: 10
133+
failureThreshold: 10
134+
startupProbe:
135+
tcpSocket:
136+
port: 9090
137+
initialDelaySeconds: 30
138+
periodSeconds: 10
139+
failureThreshold: 30
140+
env:
141+
- name: ETCD_ENDPOINTS
142+
value: "dynamo-platform-etcd.dynamo-system.svc.cluster.local:2379"
143+
- name: NATS_URL
144+
value: "nats://dynamo-platform-nats.dynamo-system.svc.cluster.local:4222"
145+
- name: NATS_SERVER
146+
value: "nats://dynamo-platform-nats.dynamo-system:4222"
147+
- name: DYN_SYSTEM_ENABLED
148+
value: "true"
149+
- name: DYN_SYSTEM_PORT
150+
value: "9090"
151+
- name: LD_LIBRARY_PATH
152+
value: "/nvidia-driver-libs:/usr/local/cuda/lib64"
153+
- name: NVIDIA_DRIVER_CAPABILITIES
154+
value: "compute,utility"
155+
volumeMounts:
156+
- name: nvidia-driver-libs
157+
mountPath: /nvidia-driver-libs
158+
readOnly: true
159+
- name: dev
160+
mountPath: /dev
161+
volumes:
162+
- name: nvidia-driver-libs
163+
hostPath:
164+
path: /nvidia-driver-libs
165+
- name: dev
166+
hostPath:
167+
path: /dev
168+
169+
# VLLMDecodeWorker - Specialized decode-only worker (GPU 2)
170+
VLLMDecodeWorker:
171+
dynamoNamespace: dynamo-vllm
172+
componentType: worker
173+
replicas: 1
174+
resources:
175+
requests:
176+
cpu: "1"
177+
memory: "4Gi"
178+
gpu: "1"
179+
limits:
180+
cpu: "2"
181+
memory: "8Gi"
182+
gpu: "1"
183+
extraPodSpec:
184+
mainContainer:
185+
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1.post1
186+
command:
187+
- /bin/sh
188+
- -c
189+
args:
190+
- "sleep 15 && export CUDA_VISIBLE_DEVICES=2 && export LD_LIBRARY_PATH=/nvidia-driver-libs:/usr/local/cuda/lib64:$LD_LIBRARY_PATH && python3 -m dynamo.vllm --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --tensor-parallel-size 1 --enforce-eager --connector null"
191+
securityContext:
192+
privileged: true
193+
livenessProbe:
194+
tcpSocket:
195+
port: 9090
196+
initialDelaySeconds: 180
197+
periodSeconds: 30
198+
failureThreshold: 5
199+
readinessProbe:
200+
tcpSocket:
201+
port: 9090
202+
initialDelaySeconds: 120
203+
periodSeconds: 10
204+
failureThreshold: 10
205+
startupProbe:
206+
tcpSocket:
207+
port: 9090
208+
initialDelaySeconds: 30
209+
periodSeconds: 10
210+
failureThreshold: 30
211+
env:
212+
- name: ETCD_ENDPOINTS
213+
value: "dynamo-platform-etcd.dynamo-system.svc.cluster.local:2379"
214+
- name: NATS_URL
215+
value: "nats://dynamo-platform-nats.dynamo-system.svc.cluster.local:4222"
216+
- name: NATS_SERVER
217+
value: "nats://dynamo-platform-nats.dynamo-system:4222"
218+
- name: DYN_SYSTEM_ENABLED
219+
value: "true"
220+
- name: DYN_SYSTEM_PORT
221+
value: "9090"
222+
- name: LD_LIBRARY_PATH
223+
value: "/nvidia-driver-libs:/usr/local/cuda/lib64"
224+
- name: NVIDIA_DRIVER_CAPABILITIES
225+
value: "compute,utility"
226+
volumeMounts:
227+
- name: nvidia-driver-libs
228+
mountPath: /nvidia-driver-libs
229+
readOnly: true
230+
- name: dev
231+
mountPath: /dev
232+
volumes:
233+
- name: nvidia-driver-libs
234+
hostPath:
235+
path: /nvidia-driver-libs
236+
- name: dev
237+
hostPath:
238+
path: /dev
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Envoy Gateway values for Dynamo E2E Testing
2+
# Enables ExtensionAPIs (EnvoyPatchPolicy) for Semantic Router integration
3+
4+
config:
5+
envoyGateway:
6+
extensionApis:
7+
enableEnvoyPatchPolicy: true
8+
9+

0 commit comments

Comments
 (0)