Skip to content

Commit 959bdbc

Browse files
committed
Add comment
1 parent 3ee9283 commit 959bdbc

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

auto_fp8/quantize.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,11 @@ def fp8_gemm(A, A_scale, B, B_scale, bias, out_dtype):
7272
# Deal with empty tensors (triggeted by empty MoE experts)
7373
return torch.empty(size=(0, B.shape[0]), dtype=out_dtype, device=A.device)
7474

75-
native_fp8_support = (
76-
torch.cuda.is_available()
77-
and torch.cuda.get_device_capability() >= (8, 9)
78-
and False
79-
)
75+
# TODO: Disable native fp8 gemm for now, always just dequantize
76+
# native_fp8_support = (
77+
# torch.cuda.is_available() and torch.cuda.get_device_capability() >= (8, 9)
78+
# )
79+
native_fp8_support = False
8080
if native_fp8_support:
8181
need_reshape = A.dim() == 3
8282
if need_reshape:

0 commit comments

Comments
 (0)