@@ -9,314 +9,32 @@ steps:
99 agents :
1010 queue : " cpu_queue_premerge"
1111
12- - label : " Upload Nightly Pipeline"
12+ # L2 Test
13+ - label : " Upload Ready Pipeline"
1314 depends_on : image-build
14- if : build.env("NIGHTLY") == "1"
15+ key : upload-ready-pipeline
16+ if : build.branch != "main"
1517 commands :
16- - buildkite-agent pipeline upload .buildkite/test-nightly.yaml
18+ - buildkite-agent pipeline upload .buildkite/test-ready.yml
1719 agents :
1820 queue : " cpu_queue_premerge"
1921
20- - label : " Simple Unit Test"
21- depends_on : image-build
22- commands :
23- - " pytest -v -s -m 'core_model and cpu' --cov=vllm_omni --cov-branch --cov-report=term-missing --cov-report=html --cov-report=xml"
24- agents :
25- queue : " gpu_1_queue"
26- plugins :
27- - docker#v5.2.0:
28- image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
29- always-pull : true
30- propagate-environment : true
31- environment :
32- - " HF_HOME=/fsx/hf_cache"
33- volumes :
34- - " /fsx/hf_cache:/fsx/hf_cache"
35-
36- - label : " Diffusion Model Test"
37- timeout_in_minutes : 20
38- depends_on : image-build
39- commands :
40- - pytest -s -v tests/e2e/offline_inference/test_t2i_model.py -m "core_model and diffusion" --run-level "core_model"
41- agents :
42- queue : " gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
43- plugins :
44- - docker#v5.2.0:
45- image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
46- always-pull : true
47- propagate-environment : true
48- environment :
49- - " HF_HOME=/fsx/hf_cache"
50- volumes :
51- - " /fsx/hf_cache:/fsx/hf_cache"
52-
53- - label : " Diffusion Model CPU offloading Test"
54- timeout_in_minutes : 20
22+ # L3 Test
23+ - label : " Upload Merge Pipeline"
5524 depends_on : image-build
25+ key : upload-merge-pipeline
26+ if : build.branch == "main"
5627 commands :
57- - pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py
58- - pytest -s -v tests/e2e/offline_inference/test_diffusion_layerwise_offload.py
28+ - buildkite-agent pipeline upload .buildkite/test-merge.yml
5929 agents :
60- queue : " gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
61- plugins :
62- - docker#v5.2.0:
63- image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
64- always-pull : true
65- propagate-environment : true
66- environment :
67- - " HF_HOME=/fsx/hf_cache"
68- volumes :
69- - " /fsx/hf_cache:/fsx/hf_cache"
70-
71- - label : " Audio Generation Model Test"
72- timeout_in_minutes : 20
73- depends_on : image-build
74- commands :
75- - pytest -s -v tests/e2e/offline_inference/test_stable_audio_model.py
76- agents :
77- queue : " gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
78- plugins :
79- - docker#v5.2.0:
80- image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
81- always-pull : true
82- propagate-environment : true
83- environment :
84- - " HF_HOME=/fsx/hf_cache"
85- volumes :
86- - " /fsx/hf_cache:/fsx/hf_cache"
87-
88- - label : " Diffusion Cache Backend Test"
89- timeout_in_minutes : 15
90- depends_on : image-build
91- commands :
92- - pytest -s -v -m 'core_model and cache and diffusion and not distributed_cuda and L4'
93- agents :
94- queue : " gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
95- plugins :
96- - docker#v5.2.0:
97- image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
98- always-pull : true
99- propagate-environment : true
100- environment :
101- - " HF_HOME=/fsx/hf_cache"
102- volumes :
103- - " /fsx/hf_cache:/fsx/hf_cache"
104-
105- - label : " Diffusion Sequence Parallelism Test"
106- timeout_in_minutes : 20
107- depends_on : image-build
108- commands :
109- - pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py -m core_model
110- agents :
111- queue : " gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
112- plugins :
113- - docker#v5.2.0:
114- image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
115- always-pull : true
116- propagate-environment : true
117- shm-size : " 8gb"
118- environment :
119- - " HF_HOME=/fsx/hf_cache"
120- volumes :
121- - " /fsx/hf_cache:/fsx/hf_cache"
122-
123- - label : " Diffusion GPU Worker Test"
124- timeout_in_minutes : 20
125- depends_on : image-build
126- commands :
127- - pytest -s -v tests/diffusion/test_diffusion_worker.py
128- agents :
129- queue : " gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
130- plugins :
131- - docker#v5.2.0:
132- image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
133- always-pull : true
134- propagate-environment : true
135- shm-size : " 8gb"
136- environment :
137- - " HF_HOME=/fsx/hf_cache"
138- volumes :
139- - " /fsx/hf_cache:/fsx/hf_cache"
140-
141-
142- # - label: "Benchmark&Engine Test"
143- # timeout_in_minutes: 15
144- # depends_on: image-build
145- # commands:
146- # - export VLLM_WORKER_MULTIPROC_METHOD=spawn
147- # - pytest -s -v tests/benchmarks/test_serve_cli.py
148- # - pytest -s -v tests/engine/test_async_omni_engine_abort.py
149- # agents:
150- # queue: "mithril-h100-pool"
151- # plugins:
152- # - kubernetes:
153- # podSpec:
154- # containers:
155- # - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
156- # resources:
157- # limits:
158- # nvidia.com/gpu: 2
159- # volumeMounts:
160- # - name: devshm
161- # mountPath: /dev/shm
162- # - name: hf-cache
163- # mountPath: /root/.cache/huggingface
164- # env:
165- # - name: HF_HOME
166- # value: /root/.cache/huggingface
167- # nodeSelector:
168- # node.kubernetes.io/instance-type: gpu-h100-sxm
169- # volumes:
170- # - name: devshm
171- # emptyDir:
172- # medium: Memory
173- # - name: hf-cache
174- # hostPath:
175- # path: /mnt/hf-cache
176- # type: DirectoryOrCreate
177-
178- - label : " Omni Model Test"
179- timeout_in_minutes : 20
180- depends_on : image-build
181- commands :
182- - export VLLM_LOGGING_LEVEL=DEBUG
183- - export VLLM_WORKER_MULTIPROC_METHOD=spawn
184- - pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py
185- agents :
186- queue : " gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
187- plugins :
188- - docker#v5.2.0:
189- image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
190- always-pull : true
191- propagate-environment : true
192- environment :
193- - " HF_HOME=/fsx/hf_cache"
194- volumes :
195- - " /fsx/hf_cache:/fsx/hf_cache"
196-
197- # - label: "Omni Model Test with H100"
198- # timeout_in_minutes: 20
199- # depends_on: image-build
200- # commands:
201- # - export VLLM_WORKER_MULTIPROC_METHOD=spawn
202- # - export VLLM_TEST_CLEAN_GPU_MEMORY="1"
203- # - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py
204- # - pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py -m "core_model" --run-level "core_model"
205- # agents:
206- # queue: "mithril-h100-pool"
207- # plugins:
208- # - kubernetes:
209- # podSpec:
210- # containers:
211- # - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
212- # resources:
213- # limits:
214- # nvidia.com/gpu: 2
215- # volumeMounts:
216- # - name: devshm
217- # mountPath: /dev/shm
218- # - name: hf-cache
219- # mountPath: /root/.cache/huggingface
220- # env:
221- # - name: HF_HOME
222- # value: /root/.cache/huggingface
223- # nodeSelector:
224- # node.kubernetes.io/instance-type: gpu-h100-sxm
225- # volumes:
226- # - name: devshm
227- # emptyDir:
228- # medium: Memory
229- # - name: hf-cache
230- # hostPath:
231- # path: /mnt/hf-cache
232- # type: DirectoryOrCreate
30+ queue : " cpu_queue_premerge"
23331
234- - label : " Qwen3-TTS E2E Test"
235- timeout_in_minutes : 20
32+ # L4 Test
33+ - label : " Upload Nightly Pipeline "
23634 depends_on : image-build
35+ key : upload-nightly-pipeline
36+ if : build.env("NIGHTLY") == "1"
23737 commands :
238- - export VLLM_LOGGING_LEVEL=DEBUG
239- - export VLLM_WORKER_MULTIPROC_METHOD=spawn
240- - pytest -s -v tests/e2e/online_serving/test_qwen3_tts.py
38+ - buildkite-agent pipeline upload .buildkite/test-nightly.yml
24139 agents :
242- queue : " gpu_4_queue"
243- plugins :
244- - docker#v5.2.0:
245- image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
246- always-pull : true
247- propagate-environment : true
248- shm-size : " 8gb"
249- environment :
250- - " HF_HOME=/fsx/hf_cache"
251- volumes :
252- - " /fsx/hf_cache:/fsx/hf_cache"
253-
254- # - label: "Diffusion Image Edit Test with H100 (1 GPU)"
255- # timeout_in_minutes: 20
256- # depends_on: image-build
257- # commands:
258- # - export VLLM_WORKER_MULTIPROC_METHOD=spawn
259- # - pytest -s -v tests/e2e/online_serving/test_image_gen_edit.py
260- # agents:
261- # queue: "mithril-h100-pool"
262- # plugins:
263- # - kubernetes:
264- # podSpec:
265- # containers:
266- # - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
267- # resources:
268- # limits:
269- # nvidia.com/gpu: 1
270- # volumeMounts:
271- # - name: devshm
272- # mountPath: /dev/shm
273- # - name: hf-cache
274- # mountPath: /root/.cache/huggingface
275- # env:
276- # - name: HF_HOME
277- # value: /root/.cache/huggingface
278- # nodeSelector:
279- # node.kubernetes.io/instance-type: gpu-h100-sxm
280- # volumes:
281- # - name: devshm
282- # emptyDir:
283- # medium: Memory
284- # - name: hf-cache
285- # hostPath:
286- # path: /mnt/hf-cache
287- # type: DirectoryOrCreate
288-
289- # - label: "Bagel Text2Img Model Test with H100"
290- # timeout_in_minutes: 30
291- # depends_on: image-build
292- # commands:
293- # - export VLLM_WORKER_MULTIPROC_METHOD=spawn
294- # - pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py
295- # agents:
296- # queue: "mithril-h100-pool"
297- # plugins:
298- # - kubernetes:
299- # podSpec:
300- # containers:
301- # - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
302- # resources:
303- # limits:
304- # nvidia.com/gpu: 1
305- # volumeMounts:
306- # - name: devshm
307- # mountPath: /dev/shm
308- # - name: hf-cache
309- # mountPath: /root/.cache/huggingface
310- # env:
311- # - name: HF_HOME
312- # value: /root/.cache/huggingface
313- # nodeSelector:
314- # node.kubernetes.io/instance-type: gpu-h100-sxm
315- # volumes:
316- # - name: devshm
317- # emptyDir:
318- # medium: Memory
319- # - name: hf-cache
320- # hostPath:
321- # path: /mnt/hf-cache
322- # type: DirectoryOrCreate
40+ queue : " cpu_queue_premerge"
0 commit comments