fix based on comments

sychen52 · sychen52 · commit 906ce5c682cc · 2025-10-15T10:27:09.000-07:00
Signed-off-by: Shiyang Chen &lt;shiychen@nvidia.com&gt;
diff --git a/modelopt/torch/export/quant_utils.py b/modelopt/torch/export/quant_utils.py
@@ -271,15 +271,17 @@ def get_weight_scaling_factor(module: nn.Module, weight_name: str = "weight") ->
         QUANTIZATION_W4A8_NVFP4_FP8,
     ]:
         if quantization_format == QUANTIZATION_W4A8_NVFP4_FP8:
-            # wsf2 for w4a8 needs to be amax/448, so that the wsf is in range 448/6.
+            # weight_scaling_factor_2 for w4a8 needs to be amax/448, so that the wsf is in range 448/6.
             # This is because the kernel dequantizes weight to fp8, which is in range 448.
-            wsf2 = weight_quantizer._amax.float() / 448.0
+            weight_scaling_factor_2 = weight_quantizer._amax.float() / 448.0
         else:
-            wsf2 = NVFP4QTensor.get_weights_scaling_factor_2_from_quantizer(weight_quantizer)
+            weight_scaling_factor_2 = NVFP4QTensor.get_weights_scaling_factor_2_from_quantizer(
+                weight_quantizer
+            )
         return NVFP4QTensor.get_weights_scaling_factor(
             weight,
             weight_quantizer.block_sizes[-1],
-            wsf2.to(weight.device),
+            weight_scaling_factor_2.to(weight.device),
         )[0]
 
     if quantization_format in [QUANTIZATION_W4A8_MXFP4_FP8, QUANTIZATION_MXFP4]:
@@ -302,7 +304,7 @@ def get_weight_scaling_factor_2(module: nn.Module, weight_name: str = "weight")
     ]:
         return NVFP4QTensor.get_weights_scaling_factor_2_from_quantizer(weight_quantizer)
     elif get_quantization_format(module) == QUANTIZATION_W4A8_NVFP4_FP8:
-        # wsf2 for w4a8 needs to be amax/448, so that the wsf is in range 448/6.
+        # weight_scaling_factor_2 for w4a8 needs to be amax/448, so that the wsf is in range 448/6.
         # This is because the kernel dequantizes weight to fp8, which is in range 448.
         return weight_quantizer._amax.float() / 448.0