Skip to content

Commit 0e3f0fc

Browse files
author
Doug Lehr
committed
Attempt to put ck blockscale back in for mi300
1 parent 7a7123f commit 0e3f0fc

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

vllm/model_executor/layers/quantization/utils/fp8_utils.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,11 @@ def rocm_aiter_gemm_w8a8_blockscale_impl(
5555
block_size: list[int],
5656
output_dtype: torch.dtype = torch.float16,
5757
) -> torch.Tensor:
58-
# import aiter as rocm_aiter
59-
60-
# return rocm_aiter.gemm_a8w8_blockscale(A, B, As, Bs, dtype=output_dtype)
61-
from aiter.ops.triton.gemm_a8w8_blockscale import gemm_a8w8_blockscale
62-
58+
# MI300's fp8nuz should be enough to detect if we call ck vs triton
59+
if current_platform.is_fp8_fnuz():
60+
from aiter import gemm_a8w8_blockscale
61+
else:
62+
from aiter.ops.triton.gemm_a8w8_blockscale import gemm_a8w8_blockscale
6363
return gemm_a8w8_blockscale(A, B, As, Bs, dtype=output_dtype)
6464

6565

0 commit comments

Comments
 (0)