Skip to content

Commit 04675ff

Browse files
feat(e2e): Add Dynamo E2E test profile with GPU support
- Add Dynamo profile for GPU-enabled disaggregated vLLM deployment - Add GPU setup integration in Kind cluster (nvidia runtime, library copy, device plugin) - Add DynamoGraphDeployment with Frontend + Prefill + Decode workers - Add Dynamo test cases (health check, GPU utilization, performance) - Fix relative paths in runner.go and profile.go - Re-enable teardown after tests - Remove Dynamo from CI matrix (requires GPU, run manually) - Update README with GPU requirements (3 GPUs minimum) - Remove unused files (namespace.yaml, kustomization.yaml, nvkind-gpu-setup-rhel.md) Requires: VM with 3+ NVIDIA GPUs Run: make e2e-test-dynamo Signed-off-by: abdallahsamabd <[email protected]>
1 parent 5b412a8 commit 04675ff

File tree

25 files changed

+3291
-53
lines changed

25 files changed

+3291
-53
lines changed

.github/workflows/integration-test-k8s.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ jobs:
2828
strategy:
2929
fail-fast: false # Continue testing other profiles even if one fails
3030
matrix:
31+
# Note: dynamo profile requires GPU, run manually with: make e2e-test-dynamo
3132
profile: [ai-gateway, aibrix, routing-strategies, llm-d, istio, production-stack]
3233

3334
steps:

config/intelligent-routing/in-tree/generic_categories.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ categories:
2323
mmlu_categories: ["computer science", "engineering"]
2424
- name: finance
2525
mmlu_categories: ["economics"]
26-
- name: politics
27-
# If omitted, identity mapping applies when this name matches MMLU
26+
- name: politics # If omitted, identity mapping applies when this name matches MMLU
2827

2928
# Decisions define routing logic by combining rules and model selection
3029
decisions:

deploy/kubernetes/dynamo/dynamo-resources/README.md

Lines changed: 660 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
---
2+
# Disaggregated vLLM Deployment for Dynamo
3+
# GPU-enabled configuration for Kind cluster with NVIDIA support
4+
#
5+
# Architecture:
6+
# Frontend: HTTP API server (GPU 0)
7+
# VLLMPrefillWorker: Specialized prefill-only worker (GPU 1)
8+
# VLLMDecodeWorker: Specialized decode-only worker (GPU 2)
9+
#
10+
# GPU Allocation (4 GPUs total):
11+
# GPU 0: Frontend
12+
# GPU 1: Prefill Worker
13+
# GPU 2: Decode Worker
14+
# GPU 3: (spare)
15+
apiVersion: nvidia.com/v1alpha1
16+
kind: DynamoGraphDeployment
17+
metadata:
18+
name: vllm
19+
namespace: dynamo-system
20+
spec:
21+
backendFramework: vllm
22+
envs:
23+
- name: DYN_LOG
24+
value: "info"
25+
services:
26+
# Frontend - HTTP API server
27+
Frontend:
28+
dynamoNamespace: dynamo-vllm
29+
componentType: frontend
30+
replicas: 1
31+
resources:
32+
requests:
33+
cpu: "1"
34+
memory: "4Gi"
35+
gpu: "1"
36+
limits:
37+
cpu: "2"
38+
memory: "8Gi"
39+
gpu: "1"
40+
extraPodSpec:
41+
mainContainer:
42+
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1.post1
43+
command:
44+
- /bin/sh
45+
- -c
46+
args:
47+
- "sleep 15 && export CUDA_VISIBLE_DEVICES=0 && export LD_LIBRARY_PATH=/nvidia-driver-libs:/usr/local/cuda/lib64:$LD_LIBRARY_PATH && python3 -m dynamo.frontend --http-port 8000"
48+
securityContext:
49+
privileged: true
50+
livenessProbe:
51+
tcpSocket:
52+
port: 8000
53+
initialDelaySeconds: 60
54+
periodSeconds: 30
55+
failureThreshold: 5
56+
readinessProbe:
57+
tcpSocket:
58+
port: 8000
59+
initialDelaySeconds: 30
60+
periodSeconds: 10
61+
failureThreshold: 10
62+
startupProbe:
63+
tcpSocket:
64+
port: 8000
65+
initialDelaySeconds: 20
66+
periodSeconds: 10
67+
failureThreshold: 30
68+
env:
69+
- name: ETCD_ENDPOINTS
70+
value: "dynamo-platform-etcd.dynamo-system.svc.cluster.local:2379"
71+
- name: NATS_URL
72+
value: "nats://dynamo-platform-nats.dynamo-system.svc.cluster.local:4222"
73+
- name: NATS_SERVER
74+
value: "nats://dynamo-platform-nats.dynamo-system:4222"
75+
- name: DYN_SYSTEM_ENABLED
76+
value: "true"
77+
- name: DYN_SYSTEM_PORT
78+
value: "9090"
79+
- name: LD_LIBRARY_PATH
80+
value: "/nvidia-driver-libs:/usr/local/cuda/lib64"
81+
- name: NVIDIA_DRIVER_CAPABILITIES
82+
value: "compute,utility"
83+
volumeMounts:
84+
- name: nvidia-driver-libs
85+
mountPath: /nvidia-driver-libs
86+
readOnly: true
87+
- name: dev
88+
mountPath: /dev
89+
volumes:
90+
- name: nvidia-driver-libs
91+
hostPath:
92+
path: /nvidia-driver-libs
93+
- name: dev
94+
hostPath:
95+
path: /dev
96+
97+
# VLLMPrefillWorker - Specialized prefill-only worker (GPU 1)
98+
VLLMPrefillWorker:
99+
dynamoNamespace: dynamo-vllm
100+
componentType: worker
101+
replicas: 1
102+
resources:
103+
requests:
104+
cpu: "1"
105+
memory: "4Gi"
106+
gpu: "1"
107+
limits:
108+
cpu: "2"
109+
memory: "8Gi"
110+
gpu: "1"
111+
extraPodSpec:
112+
mainContainer:
113+
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1.post1
114+
command:
115+
- /bin/sh
116+
- -c
117+
args:
118+
- "sleep 15 && export CUDA_VISIBLE_DEVICES=1 && export LD_LIBRARY_PATH=/nvidia-driver-libs:/usr/local/cuda/lib64:$LD_LIBRARY_PATH && python3 -m dynamo.vllm --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --tensor-parallel-size 1 --enforce-eager --is-prefill-worker --connector null"
119+
securityContext:
120+
privileged: true
121+
livenessProbe:
122+
tcpSocket:
123+
port: 9090
124+
initialDelaySeconds: 180
125+
periodSeconds: 30
126+
failureThreshold: 5
127+
readinessProbe:
128+
tcpSocket:
129+
port: 9090
130+
initialDelaySeconds: 120
131+
periodSeconds: 10
132+
failureThreshold: 10
133+
startupProbe:
134+
tcpSocket:
135+
port: 9090
136+
initialDelaySeconds: 30
137+
periodSeconds: 10
138+
failureThreshold: 30
139+
env:
140+
- name: ETCD_ENDPOINTS
141+
value: "dynamo-platform-etcd.dynamo-system.svc.cluster.local:2379"
142+
- name: NATS_URL
143+
value: "nats://dynamo-platform-nats.dynamo-system.svc.cluster.local:4222"
144+
- name: NATS_SERVER
145+
value: "nats://dynamo-platform-nats.dynamo-system:4222"
146+
- name: DYN_SYSTEM_ENABLED
147+
value: "true"
148+
- name: DYN_SYSTEM_PORT
149+
value: "9090"
150+
- name: LD_LIBRARY_PATH
151+
value: "/nvidia-driver-libs:/usr/local/cuda/lib64"
152+
- name: NVIDIA_DRIVER_CAPABILITIES
153+
value: "compute,utility"
154+
volumeMounts:
155+
- name: nvidia-driver-libs
156+
mountPath: /nvidia-driver-libs
157+
readOnly: true
158+
- name: dev
159+
mountPath: /dev
160+
volumes:
161+
- name: nvidia-driver-libs
162+
hostPath:
163+
path: /nvidia-driver-libs
164+
- name: dev
165+
hostPath:
166+
path: /dev
167+
168+
# VLLMDecodeWorker - Specialized decode-only worker (GPU 2)
169+
VLLMDecodeWorker:
170+
dynamoNamespace: dynamo-vllm
171+
componentType: worker
172+
replicas: 1
173+
resources:
174+
requests:
175+
cpu: "1"
176+
memory: "4Gi"
177+
gpu: "1"
178+
limits:
179+
cpu: "2"
180+
memory: "8Gi"
181+
gpu: "1"
182+
extraPodSpec:
183+
mainContainer:
184+
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1.post1
185+
command:
186+
- /bin/sh
187+
- -c
188+
args:
189+
- "sleep 15 && export CUDA_VISIBLE_DEVICES=2 && export LD_LIBRARY_PATH=/nvidia-driver-libs:/usr/local/cuda/lib64:$LD_LIBRARY_PATH && python3 -m dynamo.vllm --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --tensor-parallel-size 1 --enforce-eager --connector null"
190+
securityContext:
191+
privileged: true
192+
livenessProbe:
193+
tcpSocket:
194+
port: 9090
195+
initialDelaySeconds: 180
196+
periodSeconds: 30
197+
failureThreshold: 5
198+
readinessProbe:
199+
tcpSocket:
200+
port: 9090
201+
initialDelaySeconds: 120
202+
periodSeconds: 10
203+
failureThreshold: 10
204+
startupProbe:
205+
tcpSocket:
206+
port: 9090
207+
initialDelaySeconds: 30
208+
periodSeconds: 10
209+
failureThreshold: 30
210+
env:
211+
- name: ETCD_ENDPOINTS
212+
value: "dynamo-platform-etcd.dynamo-system.svc.cluster.local:2379"
213+
- name: NATS_URL
214+
value: "nats://dynamo-platform-nats.dynamo-system.svc.cluster.local:4222"
215+
- name: NATS_SERVER
216+
value: "nats://dynamo-platform-nats.dynamo-system:4222"
217+
- name: DYN_SYSTEM_ENABLED
218+
value: "true"
219+
- name: DYN_SYSTEM_PORT
220+
value: "9090"
221+
- name: LD_LIBRARY_PATH
222+
value: "/nvidia-driver-libs:/usr/local/cuda/lib64"
223+
- name: NVIDIA_DRIVER_CAPABILITIES
224+
value: "compute,utility"
225+
volumeMounts:
226+
- name: nvidia-driver-libs
227+
mountPath: /nvidia-driver-libs
228+
readOnly: true
229+
- name: dev
230+
mountPath: /dev
231+
volumes:
232+
- name: nvidia-driver-libs
233+
hostPath:
234+
path: /nvidia-driver-libs
235+
- name: dev
236+
hostPath:
237+
path: /dev
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Envoy Gateway values for Dynamo E2E Testing
2+
# Enables ExtensionAPIs (EnvoyPatchPolicy) for Semantic Router integration
3+
4+
config:
5+
envoyGateway:
6+
extensionApis:
7+
enableEnvoyPatchPolicy: true
8+

0 commit comments

Comments
 (0)