We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 34bcee8 commit c80416aCopy full SHA for c80416a
modelopt/torch/quantization/tensor_quant.py
@@ -61,6 +61,7 @@ def scaled_e4m3_impl(
61
None if inputs.device.index == torch.cuda.current_device() else inputs.device.index
62
):
63
if amax is None:
64
+ # This adds overhead; however this is not a common use case.
65
amax = torch.tensor(448.0, device=inputs.device, dtype=inputs.dtype)
66
if amax.numel() == 1:
67
outputs = cuda_ext_fp8.fake_e4m3fy(inputs, amax)
0 commit comments