Skip to content

Commit 2ac1410

Browse files
authored
[CI] [ROCm] Add more AMD CI tests (vllm-project#1039)
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
1 parent d419011 commit 2ac1410

File tree

2 files changed

+49
-33
lines changed

2 files changed

+49
-33
lines changed

.buildkite/scripts/hardware_ci/run-amd-test.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ if [[ $commands == *"--shard-id="* ]]; then
116116
--shm-size=16gb \
117117
--group-add "$render_gid" \
118118
--rm \
119+
-e MIOPEN_DEBUG_CONV_DIRECT=0 \
120+
-e MIOPEN_DEBUG_CONV_GEMM=0 \
121+
-e VLLM_ROCM_USE_AITER=1 \
119122
-e HIP_VISIBLE_DEVICES="${GPU}" \
120123
-e HF_TOKEN \
121124
-e AWS_ACCESS_KEY_ID \
@@ -148,6 +151,9 @@ else
148151
--shm-size=16gb \
149152
--group-add "$render_gid" \
150153
--rm \
154+
-e MIOPEN_DEBUG_CONV_DIRECT=0 \
155+
-e MIOPEN_DEBUG_CONV_GEMM=0 \
156+
-e VLLM_ROCM_USE_AITER=1 \
151157
-e HF_TOKEN \
152158
-e AWS_ACCESS_KEY_ID \
153159
-e AWS_SECRET_ACCESS_KEY \

.buildkite/test-amd.yaml

Lines changed: 43 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,32 @@ steps:
88
grade: Blocking
99
commands:
1010
- export GPU_ARCHS=gfx942
11-
- export MIOPEN_DEBUG_CONV_DIRECT=0
12-
- export MIOPEN_DEBUG_CONV_GEMM=0
13-
- export VLLM_ROCM_USE_AITER=1
14-
- export VLLM_ROCM_USE_AITER_MHA=1
15-
- export VLLM_ROCM_USE_AITER_LINEAR=0
16-
- export VLLM_ROCM_USE_AITER_RMSNORM=0
1711
- pytest -s -v tests/e2e/offline_inference/test_t2i_model.py
1812

13+
- label: "Diffusion Images API LoRA E2E"
14+
timeout_in_minutes: 20
15+
agent_pool: mi325_1
16+
depends_on: amd-build
17+
mirror_hardwares: [amdproduction]
18+
grade: Blocking
19+
commands:
20+
- export GPU_ARCHS=gfx942
21+
- export VLLM_LOGGING_LEVEL=DEBUG
22+
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
23+
- pytest -s -v tests/e2e/online_serving/test_images_generations_lora.py
24+
25+
- label: "Diffusion Model CPU offloading Test"
26+
timeout_in_minutes: 20
27+
agent_pool: mi325_1
28+
depends_on: amd-build
29+
mirror_hardwares: [amdproduction]
30+
grade: Blocking
31+
commands:
32+
- export GPU_ARCHS=gfx942
33+
- export VLLM_LOGGING_LEVEL=DEBUG
34+
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
35+
- pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py
36+
1937
- label: "Diffusion Cache Backend Test"
2038
timeout_in_minutes: 15
2139
agent_pool: mi325_1
@@ -26,34 +44,37 @@ steps:
2644
- export GPU_ARCHS=gfx942
2745
- export VLLM_LOGGING_LEVEL=DEBUG
2846
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
29-
- export MIOPEN_DEBUG_CONV_DIRECT=0
30-
- export MIOPEN_DEBUG_CONV_GEMM=0
31-
- export VLLM_ROCM_USE_AITER=1
32-
- export VLLM_ROCM_USE_AITER_MHA=1
33-
- export VLLM_ROCM_USE_AITER_LINEAR=0
34-
- export VLLM_ROCM_USE_AITER_RMSNORM=0
3547
- pytest -s -v tests/e2e/offline_inference/test_cache_dit.py tests/e2e/offline_inference/test_teacache.py
3648

37-
- label: "Diffusion Parallelism Test"
38-
timeout_in_minutes: 15
49+
- label: "Diffusion Sequence Parallelism Test"
50+
timeout_in_minutes: 20
3951
agent_pool: mi325_2
4052
depends_on: amd-build
4153
mirror_hardwares: [amdproduction]
4254
grade: Blocking
4355
commands:
44-
- export MIOPEN_DEBUG_CONV_DIRECT=0
45-
- export MIOPEN_DEBUG_CONV_GEMM=0
56+
- export GPU_ARCHS=gfx942
57+
- export VLLM_LOGGING_LEVEL=DEBUG
58+
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
4659
- pytest -s -v tests/e2e/offline_inference/test_sequence_parallel.py
4760

61+
- label: "Diffusion Tensor Parallelism Test"
62+
timeout_in_minutes: 20
63+
agent_pool: mi325_2
64+
depends_on: amd-build
65+
commands:
66+
- export GPU_ARCHS=gfx942
67+
- export VLLM_LOGGING_LEVEL=DEBUG
68+
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
69+
- pytest -s -v tests/e2e/offline_inference/test_zimage_tensor_parallel.py
70+
4871
- label: "Diffusion GPU Worker Test"
4972
timeout_in_minutes: 20
5073
agent_pool: mi325_2
5174
depends_on: amd-build
5275
mirror_hardwares: [amdproduction]
5376
grade: Blocking
5477
commands:
55-
- export MIOPEN_DEBUG_CONV_DIRECT=0
56-
- export MIOPEN_DEBUG_CONV_GEMM=0
5778
- pytest -s -v tests/diffusion/test_diffusion_worker.py
5879

5980
- label: "Omni Model Test Qwen2-5-Omni"
@@ -66,12 +87,6 @@ steps:
6687
- export GPU_ARCHS=gfx942
6788
- export VLLM_LOGGING_LEVEL=DEBUG
6889
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
69-
- export MIOPEN_DEBUG_CONV_DIRECT=0
70-
- export MIOPEN_DEBUG_CONV_GEMM=0
71-
- export VLLM_ROCM_USE_AITER=1
72-
- export VLLM_ROCM_USE_AITER_MHA=1
73-
- export VLLM_ROCM_USE_AITER_LINEAR=0
74-
- export VLLM_ROCM_USE_AITER_RMSNORM=0
7590
- pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py
7691

7792
- label: "Omni Model Test Qwen3-Omni"
@@ -83,9 +98,10 @@ steps:
8398
commands:
8499
- export VLLM_LOGGING_LEVEL=DEBUG
85100
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
86-
- export MIOPEN_DEBUG_CONV_DIRECT=0
87-
- export MIOPEN_DEBUG_CONV_GEMM=0
88-
- pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py tests/e2e/online_serving/test_qwen3_omni.py
101+
- export VLLM_TEST_CLEAN_GPU_MEMORY="1"
102+
- pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py
103+
- pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py
104+
- pytest -s -v tests/e2e/online_serving/test_async_omni.py
89105

90106
- label: "Diffusion Image Edit Test"
91107
timeout_in_minutes: 15
@@ -97,10 +113,4 @@ steps:
97113
- export GPU_ARCHS=gfx942
98114
- export VLLM_LOGGING_LEVEL=DEBUG
99115
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
100-
- export MIOPEN_DEBUG_CONV_DIRECT=0
101-
- export MIOPEN_DEBUG_CONV_GEMM=0
102-
- export VLLM_ROCM_USE_AITER=1
103-
- export VLLM_ROCM_USE_AITER_MHA=1
104-
- export VLLM_ROCM_USE_AITER_LINEAR=0
105-
- export VLLM_ROCM_USE_AITER_RMSNORM=0
106116
- pytest -s -v tests/e2e/online_serving/test_i2i_multi_image_input.py

0 commit comments

Comments
 (0)