keep cuda op

jiqing-feng · jiqing-feng · commit daad33d80af5 · 2025-06-09T10:36:19.000Z
Signed-off-by: jiqing-feng &lt;jiqing.feng@intel.com&gt;
diff --git a/bitsandbytes/_ops.py b/bitsandbytes/_ops.py
@@ -225,7 +225,8 @@ def _(
 
     n = A.numel()
     blocks = -(n // -blocksize)
-    absmax = torch.empty((blocks,), device=A.device, dtype=A.dtype)
+    dtype = torch.float32 if torch.cuda.is_available() else A.dtype
+    absmax = torch.empty((blocks,), device=A.device, dtype=dtype)
     out = torch.empty(((n + 1) // (quant_storage.itemsize * 2), 1), device=A.device, dtype=quant_storage)
     return out, absmax
 
@@ -268,7 +269,8 @@ def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torch.Tensor
     torch._check_is_size(blocksize)
     n = A.numel()
     blocks = -(n // -blocksize)
-    absmax = torch.empty((blocks,), device=A.device, dtype=A.dtype)
+    dtype = torch.float32 if torch.cuda.is_available() else A.dtype
+    absmax = torch.empty((blocks,), device=A.device, dtype=dtype)
     out = torch.empty_like(A, dtype=torch.uint8)
     return out, absmax