We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c4c32ba commit 8415eb1Copy full SHA for 8415eb1
modelopt/torch/quantization/tensor_quant.py
@@ -80,7 +80,7 @@ def scaled_e4m3_impl(
80
Returns:
81
Input tensors faked quantized to FP8.
82
"""
83
- if inputs.is_cpu or amax is None or amax.squeeze().ndim > 1:
+ if (not inputs.is_cuda) or amax is None or amax.squeeze().ndim > 1:
84
return fp8_eager(inputs, amax)
85
86
cuda_ext_fp8 = get_cuda_ext_fp8(raise_if_failed=False)
0 commit comments