Skip to content

Commit 4aa67aa

Browse files
Merge branch 'main' into feat/vllmomni_profiling
2 parents 81cf493 + 4de077e commit 4aa67aa

File tree

55 files changed

+6784
-916
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+6784
-916
lines changed

.buildkite/pipeline.yml

Lines changed: 17 additions & 299 deletions
Original file line numberDiff line numberDiff line change
@@ -9,314 +9,32 @@ steps:
99
agents:
1010
queue: "cpu_queue_premerge"
1111

12-
- label: "Upload Nightly Pipeline"
12+
# L2 Test
13+
- label: "Upload Ready Pipeline"
1314
depends_on: image-build
14-
if: build.env("NIGHTLY") == "1"
15+
key: upload-ready-pipeline
16+
if: build.branch != "main"
1517
commands:
16-
- buildkite-agent pipeline upload .buildkite/test-nightly.yaml
18+
- buildkite-agent pipeline upload .buildkite/test-ready.yml
1719
agents:
1820
queue: "cpu_queue_premerge"
1921

20-
- label: "Simple Unit Test"
21-
depends_on: image-build
22-
commands:
23-
- "pytest -v -s -m 'core_model and cpu' --cov=vllm_omni --cov-branch --cov-report=term-missing --cov-report=html --cov-report=xml"
24-
agents:
25-
queue: "gpu_1_queue"
26-
plugins:
27-
- docker#v5.2.0:
28-
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
29-
always-pull: true
30-
propagate-environment: true
31-
environment:
32-
- "HF_HOME=/fsx/hf_cache"
33-
volumes:
34-
- "/fsx/hf_cache:/fsx/hf_cache"
35-
36-
- label: "Diffusion Model Test"
37-
timeout_in_minutes: 20
38-
depends_on: image-build
39-
commands:
40-
- pytest -s -v tests/e2e/offline_inference/test_t2i_model.py -m "core_model and diffusion" --run-level "core_model"
41-
agents:
42-
queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
43-
plugins:
44-
- docker#v5.2.0:
45-
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
46-
always-pull: true
47-
propagate-environment: true
48-
environment:
49-
- "HF_HOME=/fsx/hf_cache"
50-
volumes:
51-
- "/fsx/hf_cache:/fsx/hf_cache"
52-
53-
- label: "Diffusion Model CPU offloading Test"
54-
timeout_in_minutes: 20
22+
# L3 Test
23+
- label: "Upload Merge Pipeline"
5524
depends_on: image-build
25+
key: upload-merge-pipeline
26+
if: build.branch == "main"
5627
commands:
57-
- pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py
58-
- pytest -s -v tests/e2e/offline_inference/test_diffusion_layerwise_offload.py
28+
- buildkite-agent pipeline upload .buildkite/test-merge.yml
5929
agents:
60-
queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
61-
plugins:
62-
- docker#v5.2.0:
63-
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
64-
always-pull: true
65-
propagate-environment: true
66-
environment:
67-
- "HF_HOME=/fsx/hf_cache"
68-
volumes:
69-
- "/fsx/hf_cache:/fsx/hf_cache"
70-
71-
- label: "Audio Generation Model Test"
72-
timeout_in_minutes: 20
73-
depends_on: image-build
74-
commands:
75-
- pytest -s -v tests/e2e/offline_inference/test_stable_audio_model.py
76-
agents:
77-
queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
78-
plugins:
79-
- docker#v5.2.0:
80-
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
81-
always-pull: true
82-
propagate-environment: true
83-
environment:
84-
- "HF_HOME=/fsx/hf_cache"
85-
volumes:
86-
- "/fsx/hf_cache:/fsx/hf_cache"
87-
88-
- label: "Diffusion Cache Backend Test"
89-
timeout_in_minutes: 15
90-
depends_on: image-build
91-
commands:
92-
- pytest -s -v -m 'core_model and cache and diffusion and not distributed_cuda and L4'
93-
agents:
94-
queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
95-
plugins:
96-
- docker#v5.2.0:
97-
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
98-
always-pull: true
99-
propagate-environment: true
100-
environment:
101-
- "HF_HOME=/fsx/hf_cache"
102-
volumes:
103-
- "/fsx/hf_cache:/fsx/hf_cache"
104-
105-
- label: "Diffusion Sequence Parallelism Test"
106-
timeout_in_minutes: 20
107-
depends_on: image-build
108-
commands:
109-
- pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py -m core_model
110-
agents:
111-
queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
112-
plugins:
113-
- docker#v5.2.0:
114-
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
115-
always-pull: true
116-
propagate-environment: true
117-
shm-size: "8gb"
118-
environment:
119-
- "HF_HOME=/fsx/hf_cache"
120-
volumes:
121-
- "/fsx/hf_cache:/fsx/hf_cache"
122-
123-
- label: "Diffusion GPU Worker Test"
124-
timeout_in_minutes: 20
125-
depends_on: image-build
126-
commands:
127-
- pytest -s -v tests/diffusion/test_diffusion_worker.py
128-
agents:
129-
queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
130-
plugins:
131-
- docker#v5.2.0:
132-
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
133-
always-pull: true
134-
propagate-environment: true
135-
shm-size: "8gb"
136-
environment:
137-
- "HF_HOME=/fsx/hf_cache"
138-
volumes:
139-
- "/fsx/hf_cache:/fsx/hf_cache"
140-
141-
142-
# - label: "Benchmark&Engine Test"
143-
# timeout_in_minutes: 15
144-
# depends_on: image-build
145-
# commands:
146-
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
147-
# - pytest -s -v tests/benchmarks/test_serve_cli.py
148-
# - pytest -s -v tests/engine/test_async_omni_engine_abort.py
149-
# agents:
150-
# queue: "mithril-h100-pool"
151-
# plugins:
152-
# - kubernetes:
153-
# podSpec:
154-
# containers:
155-
# - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
156-
# resources:
157-
# limits:
158-
# nvidia.com/gpu: 2
159-
# volumeMounts:
160-
# - name: devshm
161-
# mountPath: /dev/shm
162-
# - name: hf-cache
163-
# mountPath: /root/.cache/huggingface
164-
# env:
165-
# - name: HF_HOME
166-
# value: /root/.cache/huggingface
167-
# nodeSelector:
168-
# node.kubernetes.io/instance-type: gpu-h100-sxm
169-
# volumes:
170-
# - name: devshm
171-
# emptyDir:
172-
# medium: Memory
173-
# - name: hf-cache
174-
# hostPath:
175-
# path: /mnt/hf-cache
176-
# type: DirectoryOrCreate
177-
178-
- label: "Omni Model Test"
179-
timeout_in_minutes: 20
180-
depends_on: image-build
181-
commands:
182-
- export VLLM_LOGGING_LEVEL=DEBUG
183-
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
184-
- pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py
185-
agents:
186-
queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
187-
plugins:
188-
- docker#v5.2.0:
189-
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
190-
always-pull: true
191-
propagate-environment: true
192-
environment:
193-
- "HF_HOME=/fsx/hf_cache"
194-
volumes:
195-
- "/fsx/hf_cache:/fsx/hf_cache"
196-
197-
# - label: "Omni Model Test with H100"
198-
# timeout_in_minutes: 20
199-
# depends_on: image-build
200-
# commands:
201-
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
202-
# - export VLLM_TEST_CLEAN_GPU_MEMORY="1"
203-
# - pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py
204-
# - pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py -m "core_model" --run-level "core_model"
205-
# agents:
206-
# queue: "mithril-h100-pool"
207-
# plugins:
208-
# - kubernetes:
209-
# podSpec:
210-
# containers:
211-
# - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
212-
# resources:
213-
# limits:
214-
# nvidia.com/gpu: 2
215-
# volumeMounts:
216-
# - name: devshm
217-
# mountPath: /dev/shm
218-
# - name: hf-cache
219-
# mountPath: /root/.cache/huggingface
220-
# env:
221-
# - name: HF_HOME
222-
# value: /root/.cache/huggingface
223-
# nodeSelector:
224-
# node.kubernetes.io/instance-type: gpu-h100-sxm
225-
# volumes:
226-
# - name: devshm
227-
# emptyDir:
228-
# medium: Memory
229-
# - name: hf-cache
230-
# hostPath:
231-
# path: /mnt/hf-cache
232-
# type: DirectoryOrCreate
30+
queue: "cpu_queue_premerge"
23331

234-
- label: "Qwen3-TTS E2E Test"
235-
timeout_in_minutes: 20
32+
# L4 Test
33+
- label: "Upload Nightly Pipeline"
23634
depends_on: image-build
35+
key: upload-nightly-pipeline
36+
if: build.env("NIGHTLY") == "1"
23737
commands:
238-
- export VLLM_LOGGING_LEVEL=DEBUG
239-
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
240-
- pytest -s -v tests/e2e/online_serving/test_qwen3_tts.py
38+
- buildkite-agent pipeline upload .buildkite/test-nightly.yml
24139
agents:
242-
queue: "gpu_4_queue"
243-
plugins:
244-
- docker#v5.2.0:
245-
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
246-
always-pull: true
247-
propagate-environment: true
248-
shm-size: "8gb"
249-
environment:
250-
- "HF_HOME=/fsx/hf_cache"
251-
volumes:
252-
- "/fsx/hf_cache:/fsx/hf_cache"
253-
254-
# - label: "Diffusion Image Edit Test with H100 (1 GPU)"
255-
# timeout_in_minutes: 20
256-
# depends_on: image-build
257-
# commands:
258-
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
259-
# - pytest -s -v tests/e2e/online_serving/test_image_gen_edit.py
260-
# agents:
261-
# queue: "mithril-h100-pool"
262-
# plugins:
263-
# - kubernetes:
264-
# podSpec:
265-
# containers:
266-
# - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
267-
# resources:
268-
# limits:
269-
# nvidia.com/gpu: 1
270-
# volumeMounts:
271-
# - name: devshm
272-
# mountPath: /dev/shm
273-
# - name: hf-cache
274-
# mountPath: /root/.cache/huggingface
275-
# env:
276-
# - name: HF_HOME
277-
# value: /root/.cache/huggingface
278-
# nodeSelector:
279-
# node.kubernetes.io/instance-type: gpu-h100-sxm
280-
# volumes:
281-
# - name: devshm
282-
# emptyDir:
283-
# medium: Memory
284-
# - name: hf-cache
285-
# hostPath:
286-
# path: /mnt/hf-cache
287-
# type: DirectoryOrCreate
288-
289-
# - label: "Bagel Text2Img Model Test with H100"
290-
# timeout_in_minutes: 30
291-
# depends_on: image-build
292-
# commands:
293-
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
294-
# - pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py
295-
# agents:
296-
# queue: "mithril-h100-pool"
297-
# plugins:
298-
# - kubernetes:
299-
# podSpec:
300-
# containers:
301-
# - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
302-
# resources:
303-
# limits:
304-
# nvidia.com/gpu: 1
305-
# volumeMounts:
306-
# - name: devshm
307-
# mountPath: /dev/shm
308-
# - name: hf-cache
309-
# mountPath: /root/.cache/huggingface
310-
# env:
311-
# - name: HF_HOME
312-
# value: /root/.cache/huggingface
313-
# nodeSelector:
314-
# node.kubernetes.io/instance-type: gpu-h100-sxm
315-
# volumes:
316-
# - name: devshm
317-
# emptyDir:
318-
# medium: Memory
319-
# - name: hf-cache
320-
# hostPath:
321-
# path: /mnt/hf-cache
322-
# type: DirectoryOrCreate
40+
queue: "cpu_queue_premerge"

0 commit comments

Comments
 (0)