diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py index 24220978534c..dc4b4546e451 100644 --- a/tests/models/quantization/test_bitsandbytes.py +++ b/tests/models/quantization/test_bitsandbytes.py @@ -14,10 +14,13 @@ from ...utils import compare_two_settings, multi_gpu_test from ..utils import check_embeddings_close, check_logprobs_close -pytestmark = pytest.mark.skipif( - current_platform.is_rocm(), - reason="bitsandbytes quantization not supported on ROCm (CUDA-only kernels)", -) +if current_platform.is_rocm(): + from vllm.platforms.rocm import on_gfx9 + + pytestmark = pytest.mark.skipif( + on_gfx9(), + reason="bitsandbytes not supported on gfx9 (warp size 64 limitation)", + ) models_4bit_to_test = [ ("facebook/opt-125m", "quantize opt model inflight"), diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 788f9d69c357..bb116792fed5 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -185,6 +185,9 @@ class RocmPlatform(Platform): "petit_nvfp4", "torchao", ] + # bitsandbytes not supported on gfx9 (warp size 64 limitation) + if not on_gfx9(): + supported_quantization += ["bitsandbytes"] @classmethod def get_vit_attn_backend(