Skip to content

Commit 88f141e

Browse files
Doug Lehrdllehr-amd
authored andcommitted
Attempt to put ck blockscale back in for mi300
1 parent f83d4df commit 88f141e

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

vllm/model_executor/layers/quantization/utils/fp8_utils.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,11 @@ def rocm_aiter_gemm_w8a8_blockscale_impl(
5454
block_size: list[int],
5555
output_dtype: torch.dtype = torch.float16,
5656
) -> torch.Tensor:
57-
# import aiter as rocm_aiter
58-
59-
# return rocm_aiter.gemm_a8w8_blockscale(A, B, As, Bs, dtype=output_dtype)
60-
from aiter.ops.triton.gemm_a8w8_blockscale import gemm_a8w8_blockscale
61-
57+
# MI300's fp8nuz should be enough to detect if we call ck vs triton
58+
if current_platform.is_fp8_fnuz():
59+
from aiter import gemm_a8w8_blockscale
60+
else:
61+
from aiter.ops.triton.gemm_a8w8_blockscale import gemm_a8w8_blockscale
6262
return gemm_a8w8_blockscale(A, B, As, Bs, dtype=output_dtype)
6363

6464

0 commit comments

Comments
 (0)