minor refactor

sugunav14 · sugunav14 · commit 4057a231f89b · 2025-09-22T16:09:58.000Z
Signed-off-by: Suguna Velury &lt;178320438+sugunav14@users.noreply.github.com&gt;
diff --git a/modelopt/torch/export/quant_utils.py b/modelopt/torch/export/quant_utils.py
@@ -727,6 +727,7 @@ def to_quantized_weight(
     quantization: str,
     weights_scaling_factor2: torch.Tensor | None = None,
     block_size: int | None = None,
+    dtype: torch.dtype | None = None,
 ):
     """Converts the weight to the quantized (packed) format."""
     if weights_scaling_factor is not None:
@@ -739,6 +740,9 @@ def to_quantized_weight(
     if isinstance(weight, QTensorWrapper):
         return weight.data
 
+    if dtype:
+        weight = weight.to(dtype)
+
     if quantization == QUANTIZATION_FP8:
         # Fix RuntimeError: Promotion for Float8 Types is not supported, attempted to promote Float8_e4m3fn and Float
         # in speculative decoding fp8 model export
diff --git a/modelopt/torch/export/unified_export_hf.py b/modelopt/torch/export/unified_export_hf.py
@@ -29,7 +29,7 @@
 
 from modelopt.torch.quantization import set_quantizer_by_cfg_context
 from modelopt.torch.quantization.nn import SequentialQuantizer, TensorQuantizer
-from modelopt.torch.quantization.qtensor import NVFP4QTensor, QTensorWrapper
+from modelopt.torch.quantization.qtensor import NVFP4QTensor
 from modelopt.torch.quantization.utils import quantizer_attr_names
 
 from .convert_hf_config import convert_hf_quant_config_format
@@ -314,23 +314,25 @@ def _export_quantized_weight(
         )[0]
 
         quantized_weight = to_quantized_weight(
-            weight.to(dtype) if not isinstance(weight, QTensorWrapper) else weight,
+            weight,
             weight_scale,
             quantization_format,
             weight_scale_2,
             block_size,
+            dtype,
         )
 
         quantized_weight, weight_scale = maybe_transpose_expert_weight_dimensions(
             quantized_weight, weight_scale, is_bmm_expert_weight=is_bmm_expert_weight
         )
     else:
         quantized_weight = to_quantized_weight(
-            weight.to(dtype) if not isinstance(weight, QTensorWrapper) else weight,
+            weight,
             weight_scale,
             quantization_format,
             weight_scale_2,
             block_size,
+            dtype,
         )
 
     setattr(sub_module, weight_name, nn.Parameter(quantized_weight, requires_grad=False))