99 agents :
1010 queue : " cpu_queue_premerge"
1111
12- # - label: "Test on NPU "
13- # depends_on: ~
14- # key: npu-test
15- # commands:
16- # - " .buildkite/scripts/hardware_ci/run_npu_test.sh"
17- # agents:
18- # queue: "ascend "
12+ - label : " Upload Nightly Pipeline "
13+ depends_on : image-build
14+ if : build.env("NIGHTLY") == "1"
15+ commands :
16+ - buildkite-agent pipeline upload .buildkite/test-nightly.yaml
17+ agents :
18+ queue : " cpu_queue_premerge "
1919
2020 - label : " Simple Unit Test"
21- depends_on : ~
21+ depends_on : image-build
2222 commands :
23- - " .buildkite/scripts/simple_test.sh "
23+ - " pytest -v -s -m 'core_model and cpu' --cov=vllm_omni --cov-branch --cov-report=term-missing --cov-report=html --cov-report=xml "
2424 agents :
25- queue : " cpu_queue_premerge"
25+ queue : " gpu_1_queue"
26+ plugins :
27+ - docker#v5.2.0:
28+ image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
29+ always-pull : true
30+ propagate-environment : true
31+ environment :
32+ - " HF_HOME=/fsx/hf_cache"
33+ volumes :
34+ - " /fsx/hf_cache:/fsx/hf_cache"
2635
2736 - label : " Diffusion Model Test"
2837 timeout_in_minutes : 20
2938 depends_on : image-build
3039 commands :
31- - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py
40+ - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py -m "core_model and diffusion" --run-level "core_model"
3241 agents :
3342 queue : " gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
3443 plugins :
4655 depends_on : image-build
4756 commands :
4857 - pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py
58+ - pytest -s -v tests/e2e/offline_inference/test_diffusion_layerwise_offload.py
4959 agents :
5060 queue : " gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
5161 plugins :
7989 timeout_in_minutes : 15
8090 depends_on : image-build
8191 commands :
82- - pytest -s -v tests/e2e/offline_inference/test_cache_dit.py tests/e2e/offline_inference/test_teacache.py
92+ - pytest -s -v -m 'core_model and cache and diffusion and not distributed_cuda and L4'
8393 agents :
8494 queue : " gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
8595 plugins :
@@ -96,7 +106,7 @@ steps:
96106 timeout_in_minutes : 20
97107 depends_on : image-build
98108 commands :
99- - pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py
109+ - pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py -m core_model
100110 agents :
101111 queue : " gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
102112 plugins :
@@ -110,11 +120,11 @@ steps:
110120 volumes :
111121 - " /fsx/hf_cache:/fsx/hf_cache"
112122
113- - label : " Diffusion Tensor Parallelism Test"
123+ - label : " Diffusion GPU Worker Test"
114124 timeout_in_minutes : 20
115125 depends_on : image-build
116126 commands :
117- - pytest -s -v tests/e2e/offline_inference/test_zimage_tensor_parallel .py
127+ - pytest -s -v tests/diffusion/test_diffusion_worker .py
118128 agents :
119129 queue : " gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
120130 plugins :
@@ -128,109 +138,185 @@ steps:
128138 volumes :
129139 - " /fsx/hf_cache:/fsx/hf_cache"
130140
131- - label : " Diffusion GPU Worker Test"
141+
142+ # - label: "Benchmark&Engine Test"
143+ # timeout_in_minutes: 15
144+ # depends_on: image-build
145+ # commands:
146+ # - export VLLM_WORKER_MULTIPROC_METHOD=spawn
147+ # - pytest -s -v tests/benchmarks/test_serve_cli.py
148+ # - pytest -s -v tests/engine/test_async_omni_engine_abort.py
149+ # agents:
150+ # queue: "mithril-h100-pool"
151+ # plugins:
152+ # - kubernetes:
153+ # podSpec:
154+ # containers:
155+ # - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
156+ # resources:
157+ # limits:
158+ # nvidia.com/gpu: 2
159+ # volumeMounts:
160+ # - name: devshm
161+ # mountPath: /dev/shm
162+ # - name: hf-cache
163+ # mountPath: /root/.cache/huggingface
164+ # env:
165+ # - name: HF_HOME
166+ # value: /root/.cache/huggingface
167+ # nodeSelector:
168+ # node.kubernetes.io/instance-type: gpu-h100-sxm
169+ # volumes:
170+ # - name: devshm
171+ # emptyDir:
172+ # medium: Memory
173+ # - name: hf-cache
174+ # hostPath:
175+ # path: /mnt/hf-cache
176+ # type: DirectoryOrCreate
177+
178+ - label : " Omni Model Test"
132179 timeout_in_minutes : 20
133180 depends_on : image-build
134181 commands :
135- - pytest -s -v tests/diffusion/test_gpu_diffusion_worker.py
182+ - export VLLM_LOGGING_LEVEL=DEBUG
183+ - export VLLM_WORKER_MULTIPROC_METHOD=spawn
184+ - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py
136185 agents :
137186 queue : " gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
138187 plugins :
139188 - docker#v5.2.0:
140189 image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
141190 always-pull : true
142191 propagate-environment : true
143- shm-size : " 8gb"
144192 environment :
145193 - " HF_HOME=/fsx/hf_cache"
146194 volumes :
147195 - " /fsx/hf_cache:/fsx/hf_cache"
148196
149- - label : " Omni Model Test"
150- timeout_in_minutes : 15
197+ # - label: "Omni Model Test with H100"
198+ # timeout_in_minutes: 20
199+ # depends_on: image-build
200+ # commands:
201+ # - export VLLM_WORKER_MULTIPROC_METHOD=spawn
202+ # - export VLLM_TEST_CLEAN_GPU_MEMORY="1"
203+ # - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py
204+ # - pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py -m "core_model" --run-level "core_model"
205+ # agents:
206+ # queue: "mithril-h100-pool"
207+ # plugins:
208+ # - kubernetes:
209+ # podSpec:
210+ # containers:
211+ # - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
212+ # resources:
213+ # limits:
214+ # nvidia.com/gpu: 2
215+ # volumeMounts:
216+ # - name: devshm
217+ # mountPath: /dev/shm
218+ # - name: hf-cache
219+ # mountPath: /root/.cache/huggingface
220+ # env:
221+ # - name: HF_HOME
222+ # value: /root/.cache/huggingface
223+ # nodeSelector:
224+ # node.kubernetes.io/instance-type: gpu-h100-sxm
225+ # volumes:
226+ # - name: devshm
227+ # emptyDir:
228+ # medium: Memory
229+ # - name: hf-cache
230+ # hostPath:
231+ # path: /mnt/hf-cache
232+ # type: DirectoryOrCreate
233+
234+ - label : " Qwen3-TTS E2E Test"
235+ timeout_in_minutes : 20
151236 depends_on : image-build
152237 commands :
153238 - export VLLM_LOGGING_LEVEL=DEBUG
154239 - export VLLM_WORKER_MULTIPROC_METHOD=spawn
155- - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni .py
240+ - pytest -s -v tests/e2e/online_serving/test_qwen3_tts .py
156241 agents :
157- queue : " gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
242+ queue : " gpu_4_queue"
158243 plugins :
159244 - docker#v5.2.0:
160245 image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
161246 always-pull : true
162247 propagate-environment : true
248+ shm-size : " 8gb"
163249 environment :
164250 - " HF_HOME=/fsx/hf_cache"
165251 volumes :
166252 - " /fsx/hf_cache:/fsx/hf_cache"
167253
168- - label : " Omni Model Test with H100"
169- timeout_in_minutes : 30
170- depends_on : image-build
171- commands :
172- - export VLLM_WORKER_MULTIPROC_METHOD=spawn
173- - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py tests/e2e/ online_serving/test_qwen3_omni.py tests/e2e/online_serving/test_async_omni .py
174- agents :
175- queue : " mithril-h100-pool"
176- plugins :
177- - kubernetes :
178- podSpec :
179- containers :
180- - image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
181- resources :
182- limits :
183- nvidia.com/gpu : 2
184- volumeMounts :
185- - name : devshm
186- mountPath : /dev/shm
187- - name : hf-cache
188- mountPath : /root/.cache/huggingface
189- env :
190- - name : HF_HOME
191- value : /root/.cache/huggingface
192- nodeSelector :
193- node.kubernetes.io/instance-type : gpu-h100-sxm
194- volumes :
195- - name : devshm
196- emptyDir :
197- medium : Memory
198- - name : hf-cache
199- hostPath :
200- path : /mnt/hf-cache
201- type : DirectoryOrCreate
254+ # - label: "Diffusion Image Edit Test with H100 (1 GPU) "
255+ # timeout_in_minutes: 20
256+ # depends_on: image-build
257+ # commands:
258+ # - export VLLM_WORKER_MULTIPROC_METHOD=spawn
259+ # - pytest -s -v tests/e2e/online_serving/test_image_gen_edit .py
260+ # agents:
261+ # queue: "mithril-h100-pool"
262+ # plugins:
263+ # - kubernetes:
264+ # podSpec:
265+ # containers:
266+ # - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
267+ # resources:
268+ # limits:
269+ # nvidia.com/gpu: 1
270+ # volumeMounts:
271+ # - name: devshm
272+ # mountPath: /dev/shm
273+ # - name: hf-cache
274+ # mountPath: /root/.cache/huggingface
275+ # env:
276+ # - name: HF_HOME
277+ # value: /root/.cache/huggingface
278+ # nodeSelector:
279+ # node.kubernetes.io/instance-type: gpu-h100-sxm
280+ # volumes:
281+ # - name: devshm
282+ # emptyDir:
283+ # medium: Memory
284+ # - name: hf-cache
285+ # hostPath:
286+ # path: /mnt/hf-cache
287+ # type: DirectoryOrCreate
202288
203- - label : " Diffusion Image Edit Test with H100 (1 GPU) "
204- timeout_in_minutes : 20
205- depends_on : image-build
206- commands :
207- - export VLLM_WORKER_MULTIPROC_METHOD=spawn
208- - pytest -s -v tests/e2e/online_serving/test_i2i_multi_image_input .py
209- agents :
210- queue : " mithril-h100-pool"
211- plugins :
212- - kubernetes :
213- podSpec :
214- containers :
215- - image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
216- resources :
217- limits :
218- nvidia.com/gpu : 1
219- volumeMounts :
220- - name : devshm
221- mountPath : /dev/shm
222- - name : hf-cache
223- mountPath : /root/.cache/huggingface
224- env :
225- - name : HF_HOME
226- value : /root/.cache/huggingface
227- nodeSelector :
228- node.kubernetes.io/instance-type : gpu-h100-sxm
229- volumes :
230- - name : devshm
231- emptyDir :
232- medium : Memory
233- - name : hf-cache
234- hostPath :
235- path : /mnt/hf-cache
236- type : DirectoryOrCreate
289+ # - label: "Bagel Text2Img Model Test with H100"
290+ # timeout_in_minutes: 30
291+ # depends_on: image-build
292+ # commands:
293+ # - export VLLM_WORKER_MULTIPROC_METHOD=spawn
294+ # - pytest -s -v tests/e2e/offline_inference/test_bagel_text2img .py
295+ # agents:
296+ # queue: "mithril-h100-pool"
297+ # plugins:
298+ # - kubernetes:
299+ # podSpec:
300+ # containers:
301+ # - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
302+ # resources:
303+ # limits:
304+ # nvidia.com/gpu: 1
305+ # volumeMounts:
306+ # - name: devshm
307+ # mountPath: /dev/shm
308+ # - name: hf-cache
309+ # mountPath: /root/.cache/huggingface
310+ # env:
311+ # - name: HF_HOME
312+ # value: /root/.cache/huggingface
313+ # nodeSelector:
314+ # node.kubernetes.io/instance-type: gpu-h100-sxm
315+ # volumes:
316+ # - name: devshm
317+ # emptyDir:
318+ # medium: Memory
319+ # - name: hf-cache
320+ # hostPath:
321+ # path: /mnt/hf-cache
322+ # type: DirectoryOrCreate
0 commit comments