minor

realAsma · realAsma · commit 8415eb13e042 · 2025-09-26T22:50:54.000Z
Signed-off-by: realAsma &lt;akuriparambi@nvidia.com&gt;
diff --git a/modelopt/torch/quantization/tensor_quant.py b/modelopt/torch/quantization/tensor_quant.py
@@ -80,7 +80,7 @@ def scaled_e4m3_impl(
     Returns:
         Input tensors faked quantized to FP8.
     """
-    if inputs.is_cpu or amax is None or amax.squeeze().ndim > 1:
+    if (not inputs.is_cuda) or amax is None or amax.squeeze().ndim > 1:
         return fp8_eager(inputs, amax)
 
     cuda_ext_fp8 = get_cuda_ext_fp8(raise_if_failed=False)