Add comment

mgoin · mgoin · commit 6323dff8b798 · 2024-07-18T17:07:09.000-04:00
diff --git a/auto_fp8/quantize.py b/auto_fp8/quantize.py
@@ -73,18 +73,24 @@ def fp8_gemm(A, A_scale, B, B_scale, bias, out_dtype):
         return torch.empty(size=(0, B.shape[0]), dtype=out_dtype, device=A.device)
 
 <<<<<<< HEAD
+<<<<<<< HEAD
+=======
+>>>>>>> 959bdbc (Add comment)
     # TODO: Disable native fp8 gemm for now, always just dequantize
     # native_fp8_support = (
     #     torch.cuda.is_available() and torch.cuda.get_device_capability() >= (8, 9)
     # )
     native_fp8_support = False
+<<<<<<< HEAD
 =======
     native_fp8_support = (
         torch.cuda.is_available()
         and torch.cuda.get_device_capability() >= (8, 9)
         and False
     )
 >>>>>>> 3ee9283 (Support calibrating kv cache scales)
+=======
+>>>>>>> 959bdbc (Add comment)
     if native_fp8_support:
         need_reshape = A.dim() == 3
         if need_reshape: