code rabbit suggestions + minor fix

sugunav14 · sugunav14 · commit 6e07dff52bbd · 2025-10-14T19:12:20.000Z
Signed-off-by: Suguna Velury &lt;178320438+sugunav14@users.noreply.github.com&gt;
diff --git a/examples/llm_ptq/multinode-ptq.py b/examples/llm_ptq/multinode-ptq.py
@@ -274,7 +274,9 @@ def export_model(
     export_dir = Path(export_path)
     export_dir.mkdir(parents=True, exist_ok=True)
 
-    post_state_dict, hf_quant_config = _export_hf_checkpoint(model, torch.bfloat16)
+    post_state_dict, hf_quant_config = _export_hf_checkpoint(
+        model, torch.bfloat16, is_fsdp2=True, accelerator=accelerator
+    )
 
     if accelerator.is_main_process:
         # Save hf_quant_config.json for backward compatibility
@@ -389,4 +391,6 @@ def main(args):
 
 if __name__ == "__main__":
     args = parse_args()
-    main(args)
+    # This context manager can be removed once the update to FSDP2 function is reflected in torch
+    with patch_fsdp_mp_dtypes():
+        main(args)
diff --git a/modelopt/torch/quantization/utils.py b/modelopt/torch/quantization/utils.py
@@ -28,7 +28,6 @@
 from torch.distributed.fsdp._fully_shard._fsdp_param import FSDPParam
 from torch.distributed.tensor import Replicate
 
-from modelopt.torch.quantization.qtensor.base_qtensor import QFSDPParam, QTensorWrapper
 from modelopt.torch.utils import get_unwrapped_name, print_rank_0
 
 if TYPE_CHECKING:
@@ -479,6 +478,7 @@ def set_quantizer_state_dict(model: nn.Module, quantizer_state_dict: dict):
             module.load_state_dict(quantizer_state_dict[key])
 
 
+@contextmanager
 def patch_fsdp_mp_dtypes():
     """Patch FSDP2 to handle mixed dtypes properly during quantization."""
 
@@ -509,10 +509,15 @@ def _init_mp_dtypes(self) -> None:
     original_init_mp_dtypes = (
         torch.distributed.fsdp._fully_shard._fsdp_param_group.FSDPParamGroup._init_mp_dtypes
     )
-    torch.distributed.fsdp._fully_shard._fsdp_param_group.FSDPParamGroup._init_mp_dtypes = (
-        _init_mp_dtypes
-    )
-    return original_init_mp_dtypes
+    try:
+        torch.distributed.fsdp._fully_shard._fsdp_param_group.FSDPParamGroup._init_mp_dtypes = (
+            _init_mp_dtypes
+        )
+        yield
+    finally:
+        torch.distributed.fsdp._fully_shard._fsdp_param_group.FSDPParamGroup._init_mp_dtypes = (
+            original_init_mp_dtypes
+        )
 
 
 def get_prefixed_param_names(parent_model, target_module):
@@ -623,6 +628,8 @@ def fsdp2_aware_weight_update(root_model, modules_to_update):
         # Yields for necessary weight updates/processing
         yield
     finally:
+        from modelopt.torch.quantization.qtensor.base_qtensor import QFSDPParam, QTensorWrapper
+
         if isinstance(root_model, FSDPModule):
             # Update FSDPParam list
             for module in modules_to_update:
diff --git a/tests/gpu/torch/export/test_fsdp2_export.py b/tests/gpu/torch/export/test_fsdp2_export.py
@@ -29,7 +29,12 @@
 )
 from modelopt.torch.quantization.utils import fsdp2_aware_weight_update, patch_fsdp_mp_dtypes
 
-orig_init_mp_dtypes = patch_fsdp_mp_dtypes()
+
+@pytest.fixture(autouse=True)
+def patch_fsdp_dtypes():
+    """Automatically patch FSDP mixed precision dtypes for all tests in this module."""
+    with patch_fsdp_mp_dtypes():
+        yield
 
 
 def _update_weight_test(rank, size):