Update quant_utils.py

cjluo-nv · web-flow · commit 76ce70a084c1 · 2025-09-05T13:12:30.000-07:00
Signed-off-by: Chenjie Luo &lt;108829653+cjluo-nv@users.noreply.github.com&gt;
diff --git a/modelopt/torch/export/quant_utils.py b/modelopt/torch/export/quant_utils.py
@@ -869,6 +869,16 @@ def postprocess_state_dict(state_dict: dict, maxbound: float, quantization: str
                 post_state_dict[prefix + new_suffix] = value
                 break
 
+    # Squeeze scales with a leading dimension of 1
+    for key, value in post_state_dict.items():
+        if (
+            "scale" in key
+            and isinstance(value, torch.Tensor)
+            and value.dim() == 3
+            and value.shape[0] == 1
+        ):
+            post_state_dict[key] = value.squeeze(0)
+    
     # remove real quant parameters from the state dict
     keys_to_delete = []
     for key, value in post_state_dict.items():