Skip to content

Commit 6323dff

Browse files
committed
Add comment
1 parent d0dd9d0 commit 6323dff

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

auto_fp8/quantize.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,18 +73,24 @@ def fp8_gemm(A, A_scale, B, B_scale, bias, out_dtype):
7373
return torch.empty(size=(0, B.shape[0]), dtype=out_dtype, device=A.device)
7474

7575
<<<<<<< HEAD
76+
<<<<<<< HEAD
77+
=======
78+
>>>>>>> 959bdbc (Add comment)
7679
# TODO: Disable native fp8 gemm for now, always just dequantize
7780
# native_fp8_support = (
7881
# torch.cuda.is_available() and torch.cuda.get_device_capability() >= (8, 9)
7982
# )
8083
native_fp8_support = False
84+
<<<<<<< HEAD
8185
=======
8286
native_fp8_support = (
8387
torch.cuda.is_available()
8488
and torch.cuda.get_device_capability() >= (8, 9)
8589
and False
8690
)
8791
>>>>>>> 3ee9283 (Support calibrating kv cache scales)
92+
=======
93+
>>>>>>> 959bdbc (Add comment)
8894
if native_fp8_support:
8995
need_reshape = A.dim() == 3
9096
if need_reshape:

0 commit comments

Comments
 (0)