Skip to content

Commit 81cf493

Browse files
authored
Merge branch 'main' into feat/vllmomni_profiling
Signed-off-by: erfgss <97771661+erfgss@users.noreply.github.com>
2 parents f7f4169 + d6a3551 commit 81cf493

File tree

484 files changed

+54090
-16015
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

484 files changed

+54090
-16015
lines changed

.buildkite/pipeline.yml

Lines changed: 176 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -9,26 +9,35 @@ steps:
99
agents:
1010
queue: "cpu_queue_premerge"
1111

12-
# - label: "Test on NPU"
13-
# depends_on: ~
14-
# key: npu-test
15-
# commands:
16-
# - ".buildkite/scripts/hardware_ci/run_npu_test.sh"
17-
# agents:
18-
# queue: "ascend"
12+
- label: "Upload Nightly Pipeline"
13+
depends_on: image-build
14+
if: build.env("NIGHTLY") == "1"
15+
commands:
16+
- buildkite-agent pipeline upload .buildkite/test-nightly.yaml
17+
agents:
18+
queue: "cpu_queue_premerge"
1919

2020
- label: "Simple Unit Test"
21-
depends_on: ~
21+
depends_on: image-build
2222
commands:
23-
- ".buildkite/scripts/simple_test.sh"
23+
- "pytest -v -s -m 'core_model and cpu' --cov=vllm_omni --cov-branch --cov-report=term-missing --cov-report=html --cov-report=xml"
2424
agents:
25-
queue: "cpu_queue_premerge"
25+
queue: "gpu_1_queue"
26+
plugins:
27+
- docker#v5.2.0:
28+
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
29+
always-pull: true
30+
propagate-environment: true
31+
environment:
32+
- "HF_HOME=/fsx/hf_cache"
33+
volumes:
34+
- "/fsx/hf_cache:/fsx/hf_cache"
2635

2736
- label: "Diffusion Model Test"
2837
timeout_in_minutes: 20
2938
depends_on: image-build
3039
commands:
31-
- pytest -s -v tests/e2e/offline_inference/test_t2i_model.py
40+
- pytest -s -v tests/e2e/offline_inference/test_t2i_model.py -m "core_model and diffusion" --run-level "core_model"
3241
agents:
3342
queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
3443
plugins:
@@ -46,6 +55,7 @@ steps:
4655
depends_on: image-build
4756
commands:
4857
- pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py
58+
- pytest -s -v tests/e2e/offline_inference/test_diffusion_layerwise_offload.py
4959
agents:
5060
queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
5161
plugins:
@@ -79,7 +89,7 @@ steps:
7989
timeout_in_minutes: 15
8090
depends_on: image-build
8191
commands:
82-
- pytest -s -v tests/e2e/offline_inference/test_cache_dit.py tests/e2e/offline_inference/test_teacache.py
92+
- pytest -s -v -m 'core_model and cache and diffusion and not distributed_cuda and L4'
8393
agents:
8494
queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
8595
plugins:
@@ -96,7 +106,7 @@ steps:
96106
timeout_in_minutes: 20
97107
depends_on: image-build
98108
commands:
99-
- pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py
109+
- pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py -m core_model
100110
agents:
101111
queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
102112
plugins:
@@ -110,11 +120,11 @@ steps:
110120
volumes:
111121
- "/fsx/hf_cache:/fsx/hf_cache"
112122

113-
- label: "Diffusion Tensor Parallelism Test"
123+
- label: "Diffusion GPU Worker Test"
114124
timeout_in_minutes: 20
115125
depends_on: image-build
116126
commands:
117-
- pytest -s -v tests/e2e/offline_inference/test_zimage_tensor_parallel.py
127+
- pytest -s -v tests/diffusion/test_diffusion_worker.py
118128
agents:
119129
queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
120130
plugins:
@@ -128,109 +138,185 @@ steps:
128138
volumes:
129139
- "/fsx/hf_cache:/fsx/hf_cache"
130140

131-
- label: "Diffusion GPU Worker Test"
141+
142+
# - label: "Benchmark&Engine Test"
143+
# timeout_in_minutes: 15
144+
# depends_on: image-build
145+
# commands:
146+
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
147+
# - pytest -s -v tests/benchmarks/test_serve_cli.py
148+
# - pytest -s -v tests/engine/test_async_omni_engine_abort.py
149+
# agents:
150+
# queue: "mithril-h100-pool"
151+
# plugins:
152+
# - kubernetes:
153+
# podSpec:
154+
# containers:
155+
# - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
156+
# resources:
157+
# limits:
158+
# nvidia.com/gpu: 2
159+
# volumeMounts:
160+
# - name: devshm
161+
# mountPath: /dev/shm
162+
# - name: hf-cache
163+
# mountPath: /root/.cache/huggingface
164+
# env:
165+
# - name: HF_HOME
166+
# value: /root/.cache/huggingface
167+
# nodeSelector:
168+
# node.kubernetes.io/instance-type: gpu-h100-sxm
169+
# volumes:
170+
# - name: devshm
171+
# emptyDir:
172+
# medium: Memory
173+
# - name: hf-cache
174+
# hostPath:
175+
# path: /mnt/hf-cache
176+
# type: DirectoryOrCreate
177+
178+
- label: "Omni Model Test"
132179
timeout_in_minutes: 20
133180
depends_on: image-build
134181
commands:
135-
- pytest -s -v tests/diffusion/test_gpu_diffusion_worker.py
182+
- export VLLM_LOGGING_LEVEL=DEBUG
183+
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
184+
- pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py
136185
agents:
137186
queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
138187
plugins:
139188
- docker#v5.2.0:
140189
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
141190
always-pull: true
142191
propagate-environment: true
143-
shm-size: "8gb"
144192
environment:
145193
- "HF_HOME=/fsx/hf_cache"
146194
volumes:
147195
- "/fsx/hf_cache:/fsx/hf_cache"
148196

149-
- label: "Omni Model Test"
150-
timeout_in_minutes: 15
197+
# - label: "Omni Model Test with H100"
198+
# timeout_in_minutes: 20
199+
# depends_on: image-build
200+
# commands:
201+
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
202+
# - export VLLM_TEST_CLEAN_GPU_MEMORY="1"
203+
# - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py
204+
# - pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py -m "core_model" --run-level "core_model"
205+
# agents:
206+
# queue: "mithril-h100-pool"
207+
# plugins:
208+
# - kubernetes:
209+
# podSpec:
210+
# containers:
211+
# - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
212+
# resources:
213+
# limits:
214+
# nvidia.com/gpu: 2
215+
# volumeMounts:
216+
# - name: devshm
217+
# mountPath: /dev/shm
218+
# - name: hf-cache
219+
# mountPath: /root/.cache/huggingface
220+
# env:
221+
# - name: HF_HOME
222+
# value: /root/.cache/huggingface
223+
# nodeSelector:
224+
# node.kubernetes.io/instance-type: gpu-h100-sxm
225+
# volumes:
226+
# - name: devshm
227+
# emptyDir:
228+
# medium: Memory
229+
# - name: hf-cache
230+
# hostPath:
231+
# path: /mnt/hf-cache
232+
# type: DirectoryOrCreate
233+
234+
- label: "Qwen3-TTS E2E Test"
235+
timeout_in_minutes: 20
151236
depends_on: image-build
152237
commands:
153238
- export VLLM_LOGGING_LEVEL=DEBUG
154239
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
155-
- pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py
240+
- pytest -s -v tests/e2e/online_serving/test_qwen3_tts.py
156241
agents:
157-
queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
242+
queue: "gpu_4_queue"
158243
plugins:
159244
- docker#v5.2.0:
160245
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
161246
always-pull: true
162247
propagate-environment: true
248+
shm-size: "8gb"
163249
environment:
164250
- "HF_HOME=/fsx/hf_cache"
165251
volumes:
166252
- "/fsx/hf_cache:/fsx/hf_cache"
167253

168-
- label: "Omni Model Test with H100"
169-
timeout_in_minutes: 30
170-
depends_on: image-build
171-
commands:
172-
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
173-
- pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py tests/e2e/online_serving/test_qwen3_omni.py tests/e2e/online_serving/test_async_omni.py
174-
agents:
175-
queue: "mithril-h100-pool"
176-
plugins:
177-
- kubernetes:
178-
podSpec:
179-
containers:
180-
- image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
181-
resources:
182-
limits:
183-
nvidia.com/gpu: 2
184-
volumeMounts:
185-
- name: devshm
186-
mountPath: /dev/shm
187-
- name: hf-cache
188-
mountPath: /root/.cache/huggingface
189-
env:
190-
- name: HF_HOME
191-
value: /root/.cache/huggingface
192-
nodeSelector:
193-
node.kubernetes.io/instance-type: gpu-h100-sxm
194-
volumes:
195-
- name: devshm
196-
emptyDir:
197-
medium: Memory
198-
- name: hf-cache
199-
hostPath:
200-
path: /mnt/hf-cache
201-
type: DirectoryOrCreate
254+
# - label: "Diffusion Image Edit Test with H100 (1 GPU)"
255+
# timeout_in_minutes: 20
256+
# depends_on: image-build
257+
# commands:
258+
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
259+
# - pytest -s -v tests/e2e/online_serving/test_image_gen_edit.py
260+
# agents:
261+
# queue: "mithril-h100-pool"
262+
# plugins:
263+
# - kubernetes:
264+
# podSpec:
265+
# containers:
266+
# - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
267+
# resources:
268+
# limits:
269+
# nvidia.com/gpu: 1
270+
# volumeMounts:
271+
# - name: devshm
272+
# mountPath: /dev/shm
273+
# - name: hf-cache
274+
# mountPath: /root/.cache/huggingface
275+
# env:
276+
# - name: HF_HOME
277+
# value: /root/.cache/huggingface
278+
# nodeSelector:
279+
# node.kubernetes.io/instance-type: gpu-h100-sxm
280+
# volumes:
281+
# - name: devshm
282+
# emptyDir:
283+
# medium: Memory
284+
# - name: hf-cache
285+
# hostPath:
286+
# path: /mnt/hf-cache
287+
# type: DirectoryOrCreate
202288

203-
- label: "Diffusion Image Edit Test with H100 (1 GPU)"
204-
timeout_in_minutes: 20
205-
depends_on: image-build
206-
commands:
207-
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
208-
- pytest -s -v tests/e2e/online_serving/test_i2i_multi_image_input.py
209-
agents:
210-
queue: "mithril-h100-pool"
211-
plugins:
212-
- kubernetes:
213-
podSpec:
214-
containers:
215-
- image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
216-
resources:
217-
limits:
218-
nvidia.com/gpu: 1
219-
volumeMounts:
220-
- name: devshm
221-
mountPath: /dev/shm
222-
- name: hf-cache
223-
mountPath: /root/.cache/huggingface
224-
env:
225-
- name: HF_HOME
226-
value: /root/.cache/huggingface
227-
nodeSelector:
228-
node.kubernetes.io/instance-type: gpu-h100-sxm
229-
volumes:
230-
- name: devshm
231-
emptyDir:
232-
medium: Memory
233-
- name: hf-cache
234-
hostPath:
235-
path: /mnt/hf-cache
236-
type: DirectoryOrCreate
289+
# - label: "Bagel Text2Img Model Test with H100"
290+
# timeout_in_minutes: 30
291+
# depends_on: image-build
292+
# commands:
293+
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
294+
# - pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py
295+
# agents:
296+
# queue: "mithril-h100-pool"
297+
# plugins:
298+
# - kubernetes:
299+
# podSpec:
300+
# containers:
301+
# - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
302+
# resources:
303+
# limits:
304+
# nvidia.com/gpu: 1
305+
# volumeMounts:
306+
# - name: devshm
307+
# mountPath: /dev/shm
308+
# - name: hf-cache
309+
# mountPath: /root/.cache/huggingface
310+
# env:
311+
# - name: HF_HOME
312+
# value: /root/.cache/huggingface
313+
# nodeSelector:
314+
# node.kubernetes.io/instance-type: gpu-h100-sxm
315+
# volumes:
316+
# - name: devshm
317+
# emptyDir:
318+
# medium: Memory
319+
# - name: hf-cache
320+
# hostPath:
321+
# path: /mnt/hf-cache
322+
# type: DirectoryOrCreate

.buildkite/scripts/hardware_ci/run-amd-test.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ if [[ $commands == *"--shard-id="* ]]; then
116116
--shm-size=16gb \
117117
--group-add "$render_gid" \
118118
--rm \
119+
-e MIOPEN_DEBUG_CONV_DIRECT=0 \
120+
-e MIOPEN_DEBUG_CONV_GEMM=0 \
121+
-e VLLM_ROCM_USE_AITER=1 \
119122
-e HIP_VISIBLE_DEVICES="${GPU}" \
120123
-e HF_TOKEN \
121124
-e AWS_ACCESS_KEY_ID \
@@ -148,6 +151,9 @@ else
148151
--shm-size=16gb \
149152
--group-add "$render_gid" \
150153
--rm \
154+
-e MIOPEN_DEBUG_CONV_DIRECT=0 \
155+
-e MIOPEN_DEBUG_CONV_GEMM=0 \
156+
-e VLLM_ROCM_USE_AITER=1 \
151157
-e HF_TOKEN \
152158
-e AWS_ACCESS_KEY_ID \
153159
-e AWS_SECRET_ACCESS_KEY \

0 commit comments

Comments
 (0)