Skip to content

Commit 783644e

Browse files
[ROCm][CI] Skip multi-GPU speculative decoding tests when insufficient GPUs available (vllm-project#30527)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
1 parent 197473c commit 783644e

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

tests/v1/e2e/test_spec_decode.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,16 @@
1616
MTP_SIMILARITY_RATE = 0.8
1717

1818

19+
def _skip_if_insufficient_gpus_for_tp(tp_size: int):
20+
"""Skip test if available GPUs < tp_size on ROCm."""
21+
if current_platform.is_rocm():
22+
available_gpus = torch.cuda.device_count()
23+
if available_gpus < tp_size:
24+
pytest.skip(
25+
f"Test requires {tp_size} GPUs, but only {available_gpus} available"
26+
)
27+
28+
1929
def get_test_prompts(mm_enabled: bool):
2030
prompt_types = ["repeat", "sentence"]
2131
if mm_enabled:
@@ -455,6 +465,8 @@ def test_eagle_correctness(
455465
m.setenv("VLLM_ROCM_USE_AITER", "1")
456466

457467
method, model_name, spec_model_name, tp_size = model_setup
468+
_skip_if_insufficient_gpus_for_tp(tp_size)
469+
458470
max_model_len = 2048
459471
max_num_batched_tokens = 128 if enable_chunked_prefill else max_model_len
460472

@@ -525,6 +537,7 @@ def test_mtp_correctness(
525537
m.setenv("VLLM_MLA_DISABLE", "1")
526538

527539
method, model_name, tp_size = model_setup
540+
_skip_if_insufficient_gpus_for_tp(tp_size)
528541

529542
ref_llm = LLM(
530543
model=model_name,

0 commit comments

Comments
 (0)