fixed unit tests

sugunav14 · sugunav14 · commit d95e2fd392ef · 2025-10-15T16:14:34.000Z
Signed-off-by: Suguna Velury &lt;178320438+sugunav14@users.noreply.github.com&gt;
diff --git a/modelopt/torch/export/unified_export_hf.py b/modelopt/torch/export/unified_export_hf.py
@@ -26,7 +26,11 @@
 
 import torch
 import torch.nn as nn
-from accelerate import Accelerator
+
+try:
+    from accelerate import Accelerator
+except ImportError:  # pragma: no cover
+    Accelerator = None
 from safetensors.torch import save_file
 
 from modelopt.torch.quantization import set_quantizer_by_cfg_context
diff --git a/tests/gpu/torch/export/test_fsdp2_export.py b/tests/gpu/torch/export/test_fsdp2_export.py
@@ -30,78 +30,73 @@
 from modelopt.torch.quantization.utils import fsdp2_aware_weight_update, patch_fsdp_mp_dtypes
 
 
-@pytest.fixture(autouse=True)
-def patch_fsdp_dtypes():
-    """Automatically patch FSDP mixed precision dtypes for all tests in this module."""
-    with patch_fsdp_mp_dtypes():
-        yield
-
-
 def _update_weight_test(rank, size):
     """Test fsdp2 weight update context for weight update -> only value changed"""
     from torch.distributed._composable.fsdp import fully_shard
 
-    # Define and shard model
-    model = ToyModel(dims=[4, 4], bias=False).to("cuda")
+    with patch_fsdp_mp_dtypes():
+        # Define and shard model
+        model = ToyModel(dims=[4, 4], bias=False).to("cuda")
 
-    assert not torch.equal(
-        model.linears.weight.data,
-        torch.zeros(4, 4).to(model.linears.weight.device).to(model.linears.weight.dtype),
-    )
+        assert not torch.equal(
+            model.linears.weight.data,
+            torch.zeros(4, 4).to(model.linears.weight.device).to(model.linears.weight.dtype),
+        )
 
-    fully_shard(model.linears)
-    fully_shard(model)
+        fully_shard(model.linears)
+        fully_shard(model)
 
-    torch.distributed.barrier()
+        torch.distributed.barrier()
 
-    for name, module in model.named_modules():
-        if "linears" in name:
-            with fsdp2_aware_weight_update(model, module):
-                module.weight.data = torch.zeros_like(module.weight.data)
+        for name, module in model.named_modules():
+            if "linears" in name:
+                with fsdp2_aware_weight_update(model, module):
+                    module.weight.data = torch.zeros_like(module.weight.data)
 
-    torch.distributed.barrier()
-    model.linears.unshard()
+        torch.distributed.barrier()
+        model.linears.unshard()
 
-    # Check if weights are as expected after unshard
-    for param in model.parameters():
-        assert torch.allclose(
-            torch.zeros(4, 4).to(param.data.device).to(param.data.dtype), param.data
-        )
+        # Check if weights are as expected after unshard
+        for param in model.parameters():
+            assert torch.allclose(
+                torch.zeros(4, 4).to(param.data.device).to(param.data.dtype), param.data
+            )
 
-    # Check if forward pass is as expected
-    model.linears.reshard()
-    output = model(torch.randn(4, 4).to(model.linears.weight.device))
-    assert torch.allclose(torch.zeros(4, 4).to(output.device).to(output.dtype), output)
+        # Check if forward pass is as expected
+        model.linears.reshard()
+        output = model(torch.randn(4, 4).to(model.linears.weight.device))
+        assert torch.allclose(torch.zeros(4, 4).to(output.device).to(output.dtype), output)
 
 
 def _compress_weight_test(rank, size):
     """Test fsdp2 weight update context for weight compression -> only value,shape and dtype changed"""
     from torch.distributed._composable.fsdp import fully_shard
 
-    # Define and shard model
-    model = ToyModel(dims=[6, 6], bias=False).to("cuda")
+    with patch_fsdp_mp_dtypes():
+        # Define and shard model
+        model = ToyModel(dims=[6, 6], bias=False).to("cuda")
 
-    assert not torch.equal(
-        model.linears.weight.data,
-        torch.zeros(6, 6).to(model.linears.weight.device).to(model.linears.weight.dtype),
-    )
+        assert not torch.equal(
+            model.linears.weight.data,
+            torch.zeros(6, 6).to(model.linears.weight.device).to(model.linears.weight.dtype),
+        )
 
-    fully_shard(model.linears)
-    fully_shard(model)
-    torch.distributed.barrier()
+        fully_shard(model.linears)
+        fully_shard(model)
+        torch.distributed.barrier()
 
-    for name, module in model.named_modules():
-        if "linears" in name:
-            with fsdp2_aware_weight_update(model, module):
-                module.weight.data = (
-                    torch.zeros(2, 2).to(torch.float8_e4m3fn).to(module.weight.data.device)
-                )
+        for name, module in model.named_modules():
+            if "linears" in name:
+                with fsdp2_aware_weight_update(model, module):
+                    module.weight.data = (
+                        torch.zeros(2, 2).to(torch.float8_e4m3fn).to(module.weight.data.device)
+                    )
 
-    torch.distributed.barrier()
-    model.linears.unshard()
-    # Check if weights are as expected after unshard
-    for param in model.parameters():
-        assert param.data.dtype == torch.float8_e4m3fn
+        torch.distributed.barrier()
+        model.linears.unshard()
+        # Check if weights are as expected after unshard
+        for param in model.parameters():
+            assert param.data.dtype == torch.float8_e4m3fn
 
 
 def _compare_parameters_and_buffers(model1, model2):
@@ -126,97 +121,99 @@ def _fuse_layers(rank, size, quant_config):
 
     from torch.distributed._composable.fsdp import fully_shard
 
-    # Initialize model
-    model = SmallQKVModel(dim=32).to("cuda")
-    non_fsdp_model = SmallQKVModel(dim=32).to("cuda")
-    non_fsdp_model.load_state_dict(copy.deepcopy(model.state_dict()))
-    model.eval()
-    non_fsdp_model.eval()
+    with patch_fsdp_mp_dtypes():
+        # Initialize model
+        model = SmallQKVModel(dim=32).to("cuda")
+        non_fsdp_model = SmallQKVModel(dim=32).to("cuda")
+        non_fsdp_model.load_state_dict(copy.deepcopy(model.state_dict()))
+        model.eval()
+        non_fsdp_model.eval()
 
-    _compare_parameters_and_buffers(model, non_fsdp_model)
+        _compare_parameters_and_buffers(model, non_fsdp_model)
 
-    # Create calibration data ONCE
-    calib_data = torch.randn(1, 32, device="cuda")
+        # Create calibration data ONCE
+        calib_data = torch.randn(1, 32, device="cuda")
 
-    def calib_fn(x):
-        return x(calib_data)
+        def calib_fn(x):
+            return x(calib_data)
 
-    # Shard model
-    fully_shard(model)
-    torch.distributed.barrier()
+        # Shard model
+        fully_shard(model)
+        torch.distributed.barrier()
 
-    # Quantize model
-    mtq.quantize(model, quant_config, calib_fn)
-    mtq.quantize(non_fsdp_model, quant_config, calib_fn)
+        # Quantize model
+        mtq.quantize(model, quant_config, calib_fn)
+        mtq.quantize(non_fsdp_model, quant_config, calib_fn)
 
-    torch.distributed.barrier()
+        torch.distributed.barrier()
 
-    model.apply_embed = True
-    non_fsdp_model.apply_embed = True
+        model.apply_embed = True
+        non_fsdp_model.apply_embed = True
 
-    requantize_resmooth_fused_llm_layers(model)
-    requantize_resmooth_fused_llm_layers(non_fsdp_model)
+        requantize_resmooth_fused_llm_layers(model)
+        requantize_resmooth_fused_llm_layers(non_fsdp_model)
 
-    torch.distributed.barrier()
+        torch.distributed.barrier()
 
-    # Unshard model
-    model.unshard()
+        # Unshard model
+        model.unshard()
 
-    _compare_parameters_and_buffers(model, non_fsdp_model)
+        _compare_parameters_and_buffers(model, non_fsdp_model)
 
 
 def _export_quantized_weight_test(rank, size, quant_config):
     import copy
 
     from torch.distributed._composable.fsdp import fully_shard
 
-    # Initialize model
-    model = SmallQKVModel(dim=32).to("cuda")
-    non_fsdp_model = SmallQKVModel(dim=32).to("cuda")
-    non_fsdp_model.load_state_dict(copy.deepcopy(model.state_dict()))
-    model.eval()
-    non_fsdp_model.eval()
-    _compare_parameters_and_buffers(model, non_fsdp_model)
+    with patch_fsdp_mp_dtypes():
+        # Initialize model
+        model = SmallQKVModel(dim=32).to("cuda")
+        non_fsdp_model = SmallQKVModel(dim=32).to("cuda")
+        non_fsdp_model.load_state_dict(copy.deepcopy(model.state_dict()))
+        model.eval()
+        non_fsdp_model.eval()
+        _compare_parameters_and_buffers(model, non_fsdp_model)
 
-    # Create calibration data ONCE
-    calib_data = torch.randn(1, 32, device="cuda")
+        # Create calibration data ONCE
+        calib_data = torch.randn(1, 32, device="cuda")
 
-    def calib_fn(x):
-        return x(calib_data)
+        def calib_fn(x):
+            return x(calib_data)
 
-    # Shard model
-    fully_shard(model)
-    torch.distributed.barrier()
+        # Shard model
+        fully_shard(model)
+        torch.distributed.barrier()
 
-    # Quantize model
-    mtq.quantize(model, quant_config, calib_fn)
-    mtq.quantize(non_fsdp_model, quant_config, calib_fn)
+        # Quantize model
+        mtq.quantize(model, quant_config, calib_fn)
+        mtq.quantize(non_fsdp_model, quant_config, calib_fn)
 
-    torch.distributed.barrier()
+        torch.distributed.barrier()
 
-    model.apply_embed = True
-    non_fsdp_model.apply_embed = True
+        model.apply_embed = True
+        non_fsdp_model.apply_embed = True
 
-    requantize_resmooth_fused_llm_layers(model)
-    requantize_resmooth_fused_llm_layers(non_fsdp_model)
+        requantize_resmooth_fused_llm_layers(model)
+        requantize_resmooth_fused_llm_layers(non_fsdp_model)
 
-    torch.distributed.barrier()
+        torch.distributed.barrier()
 
-    for name, sub_module in model.named_modules():
-        if is_quantlinear(sub_module):
-            with fsdp2_aware_weight_update(model, sub_module):
-                _export_quantized_weight(sub_module, torch.float16)
+        for name, sub_module in model.named_modules():
+            if is_quantlinear(sub_module):
+                with fsdp2_aware_weight_update(model, sub_module):
+                    _export_quantized_weight(sub_module, torch.float16)
 
-    for name, sub_module in non_fsdp_model.named_modules():
-        if is_quantlinear(sub_module):
-            with fsdp2_aware_weight_update(non_fsdp_model, sub_module):
-                _export_quantized_weight(sub_module, torch.float16)
+        for name, sub_module in non_fsdp_model.named_modules():
+            if is_quantlinear(sub_module):
+                with fsdp2_aware_weight_update(non_fsdp_model, sub_module):
+                    _export_quantized_weight(sub_module, torch.float16)
 
-    torch.distributed.barrier()
-    # Unshard model
-    model.unshard()
+        torch.distributed.barrier()
+        # Unshard model
+        model.unshard()
 
-    _compare_parameters_and_buffers(model, non_fsdp_model)
+        _compare_parameters_and_buffers(model, non_fsdp_model)
 
 
 @pytest.mark.parametrize("device_count", [2])