Skip to content

Commit 9571e99

Browse files
[ROCm][CI] Extending attention backend coverage for Eagle spec decode tests (vllm-project#35265)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
1 parent c97234c commit 9571e99

File tree

4 files changed

+314
-150
lines changed

4 files changed

+314
-150
lines changed

.buildkite/test_areas/engine.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ steps:
3030
- pytest -v -s v1/engine --ignore v1/engine/test_preprocess_error_handling.py
3131
mirror:
3232
amd:
33-
device: mi325_8
33+
device: mi325_1
3434
depends_on:
3535
- image-build-amd
3636
commands:

tests/utils.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1327,6 +1327,57 @@ def wrapper(f: Callable[_P, None]) -> Callable[_P, None]:
13271327
return wrapper
13281328

13291329

1330+
def gpu_tier_mark(*, min_gpus: int = 1, max_gpus: int | None = None):
1331+
"""
1332+
Mark a test to only run when the GPU count falls within [min_gpus, max_gpus].
1333+
1334+
Examples:
1335+
@gpu_tier_mark(min_gpus=2) # only on multi-GPU
1336+
@gpu_tier_mark(max_gpus=1) # only on single-GPU
1337+
@gpu_tier_mark(min_gpus=2, max_gpus=4) # 2-4 GPUs only
1338+
"""
1339+
gpu_count = cuda_device_count_stateless()
1340+
marks = []
1341+
1342+
if min_gpus > 1:
1343+
marks.append(pytest.mark.distributed(num_gpus=min_gpus))
1344+
1345+
reasons = []
1346+
if gpu_count < min_gpus:
1347+
reasons.append(f"Need at least {min_gpus} GPUs (have {gpu_count})")
1348+
if max_gpus is not None and gpu_count > max_gpus:
1349+
reasons.append(f"Need at most {max_gpus} GPUs (have {gpu_count})")
1350+
1351+
if reasons:
1352+
marks.append(pytest.mark.skipif(True, reason="; ".join(reasons)))
1353+
1354+
return marks
1355+
1356+
1357+
def single_gpu_only(f=None):
1358+
"""Skip this test when running in a multi-GPU environment."""
1359+
marks = gpu_tier_mark(max_gpus=1)
1360+
1361+
def wrapper(func):
1362+
for mark in reversed(marks):
1363+
func = mark(func)
1364+
return func
1365+
1366+
return wrapper(f) if f is not None else wrapper
1367+
1368+
1369+
def multi_gpu_only(*, num_gpus: int = 2):
1370+
"""Skip this test when running on fewer than num_gpus GPUs."""
1371+
marks = gpu_tier_mark(min_gpus=num_gpus)
1372+
1373+
def wrapper(f):
1374+
for mark in reversed(marks):
1375+
f = mark(f)
1376+
return f
1377+
1378+
return wrapper
1379+
1380+
13301381
async def completions_with_server_args(
13311382
prompts: list[str],
13321383
model_name: str,

tests/v1/e2e/test_async_scheduling.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import pytest
77
import torch._dynamo.config as dynamo_config
88

9+
from tests.utils import large_gpu_mark, single_gpu_only
910
from vllm import SamplingParams
1011
from vllm.logprobs import Logprob
1112
from vllm.platforms import current_platform
@@ -36,6 +37,7 @@
3637
)
3738

3839

40+
@single_gpu_only
3941
def test_without_spec_decoding(
4042
sample_json_schema,
4143
monkeypatch: pytest.MonkeyPatch,
@@ -95,6 +97,8 @@ def test_without_spec_decoding(
9597
run_tests(monkeypatch, MODEL, test_configs, test_sampling_params)
9698

9799

100+
@single_gpu_only
101+
@large_gpu_mark(min_gb=16)
98102
def test_with_spec_decoding(sample_json_schema, monkeypatch: pytest.MonkeyPatch):
99103
"""Test consistency and acceptance rates with some different combos of
100104
preemption, executor, async scheduling, prefill chunking,

0 commit comments

Comments
 (0)