minor

realAsma · realAsma · commit c80416ac3043 · 2025-09-26T17:39:13.000Z
Signed-off-by: realAsma &lt;akuriparambi@nvidia.com&gt;
diff --git a/modelopt/torch/quantization/tensor_quant.py b/modelopt/torch/quantization/tensor_quant.py
@@ -61,6 +61,7 @@ def scaled_e4m3_impl(
         None if inputs.device.index == torch.cuda.current_device() else inputs.device.index
     ):
         if amax is None:
+            # This adds overhead; however this is not a common use case.
             amax = torch.tensor(448.0, device=inputs.device, dtype=inputs.dtype)
         if amax.numel() == 1:
             outputs = cuda_ext_fp8.fake_e4m3fy(inputs, amax)