[Bug-fix] Remove force_assign in sort_parameters to avoid re-sorting (#514)

kevalmorabia97 · web-flow · commit 8cf516ed45e1 · 2025-11-05T23:11:49.000Z
## What does this PR do?

**Type of change:** Bug-fix &lt;!-- Use one of the following: Bug fix, new
feature, new example, new tests, documentation. --&gt;

**Overview:** 

- So far for FastNAS/Gradnas sorting, we `force_assign` parameters to
sorted but we retain `hp.active_slice` so next time we fetch
`mod.weight` we will apply sorting on already sorted weight tensor
resulting in incorrect ordering. This is a bug applicable only for
FastNAS/GradNAS modules because Minitron already had a remedy for this
by resetting order to None during export

## Testing

- Tests pass
- Manually verified

Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,7 +1,14 @@
 Model Optimizer Changelog (Linux)
 =================================
 
-0.39 (2025-11-07)
+0.40 (2025-12-xx)
+^^^^^^^^^^^^^^^^^
+
+**Bug Fixes**
+
+- Fix a bug in FastNAS pruning (computer vision models) where the model parameters were sorted twice messing up the ordering.
+
+0.39 (2025-11-14)
 ^^^^^^^^^^^^^^^^^
 
 **Deprecations**
diff --git a/modelopt/torch/nas/modules/conv.py b/modelopt/torch/nas/modules/conv.py
@@ -137,7 +137,7 @@ def _estimate_importance(self) -> TracedHp.Importance:
         # for group > 1, we do not know how to handle it yet
         if self.groups > 1:
             return None
-        weight = self._parameters["weight"]  # retrieve full weight tensor
+        weight = self.weight
         c_in = weight.shape[1]
         return torch.linalg.vector_norm(
             torch.reshape(weight.detach().transpose(0, 1), (c_in, -1)), dim=1
@@ -249,6 +249,6 @@ def _estimate_importance(self) -> TracedHp.Importance:
         # for group > 1, we do not know how to handle it yet
         if self.groups > 1:
             return None
-        weight = self._parameters["weight"]  # retrieve full weight tensor
+        weight = self.weight
         c_in = weight.shape[0]
         return torch.linalg.vector_norm(torch.reshape(weight.detach(), (c_in, -1)), dim=1)
diff --git a/modelopt/torch/nas/modules/linear.py b/modelopt/torch/nas/modules/linear.py
@@ -41,7 +41,7 @@ def _get_bias(mod: "_DynamicLinear", bias: torch.Tensor | None) -> torch.Tensor
         return get_sliced_tensor(mod, bias, "out_features")
 
     def _estimate_importance(self) -> TracedHp.Importance:
-        return torch.linalg.vector_norm(self._parameters["weight"].detach(), dim=0)
+        return torch.linalg.vector_norm(self.weight.detach(), dim=0)
 
     def _setup(self):
         # register hyperparameters
diff --git a/modelopt/torch/nas/plugins/megatron.py b/modelopt/torch/nas/plugins/megatron.py
@@ -51,7 +51,6 @@
 from modelopt.torch.opt.dynamic import DynamicModule
 from modelopt.torch.opt.hparam import HPType
 from modelopt.torch.opt.searcher import ConstraintsDict
-from modelopt.torch.opt.utils import named_hparams
 from modelopt.torch.trace import Symbol
 from modelopt.torch.utils import distributed as dist
 from modelopt.torch.utils import (
@@ -1322,12 +1321,6 @@ def _export_drop_layers(self) -> None:
 
     def export(self) -> torch.nn.Module:
         """Export the dynamic module to a torch.nn.Module."""
-        # TODO: Improve this!
-        # Slice order needs to be reset before exporting since weights are already
-        # force assigned and we dont want to sort them again (losing the correct order)
-        for n, hp in named_hparams(self, configurable=True):
-            hp.enforce_order(None)
-
         for handle in self.hook_handles:
             handle.remove()
         self._export_drop_layers()
diff --git a/modelopt/torch/nas/plugins/transformers.py b/modelopt/torch/nas/plugins/transformers.py
@@ -123,7 +123,7 @@ def configure_qkv_out(self, q_name: str, k_name: str, v_name: str, out_name: str
 
         assert isinstance(out, nn.Linear)
         hp_hidden_dim.register_importance(
-            lambda: torch.linalg.vector_norm(out._parameters["weight"].detach(), dim=0)
+            lambda: torch.linalg.vector_norm(out.weight.detach(), dim=0)
         )
 
     def modify(
diff --git a/modelopt/torch/nas/search_space.py b/modelopt/torch/nas/search_space.py
@@ -162,10 +162,6 @@ def sort_parameters(self, hps_to_sort: set[str] | None = None, verbose: bool = F
                     f"{'order' if hp._importance_is_order else 'importance'}={importance}"
                 )
 
-        # now that we have enforced an order we can force reassign all parameters/buffers!
-        for _, mod in self.named_dynamic_modules():
-            mod.force_assign()
-
         # go back to old config
         self.select(config)
 
diff --git a/modelopt/torch/opt/dynamic.py b/modelopt/torch/opt/dynamic.py
@@ -586,28 +586,6 @@ def export(self) -> nn.Module:
 
         return self
 
-    @torch.no_grad()
-    def force_assign(self):
-        """Force re-assign all dynamic attributes to their current values.
-
-        .. warning::
-
-            Note that this method overwrites the actual buffers and parameters! Only use in
-            specific circumstances!!
-        """
-        # force-reassign all dynamic attributes
-        for name in self._get_dm_attribute_manager().da_keys():
-            val = getattr(self, name)
-            if isinstance(val, torch.Tensor):
-                val = val.detach().clone()
-            if name in self._parameters:
-                val = val if val is None else Parameter(val)
-                self.register_parameter(name, val)
-            elif name in self._buffers:
-                self.register_buffer(name, val)
-            else:
-                setattr(self, name, val)
-
     @classmethod
     @torch.no_grad()
     def convert(cls, module: nn.Module) -> "DynamicModule":

Original file line number	Diff line number	Diff line change
`@@ -123,7 +123,7 @@ def configure_qkv_out(self, q_name: str, k_name: str, v_name: str, out_name: str`
`123`	`123`
`124`	`124`	`assert isinstance(out, nn.Linear)`
`125`	`125`	`hp_hidden_dim.register_importance(`
`126`		`- lambda: torch.linalg.vector_norm(out._parameters["weight"].detach(), dim=0)`
	`126`	`+ lambda: torch.linalg.vector_norm(out.weight.detach(), dim=0)`
`127`	`127`	`)`
`128`	`128`
`129`	`129`	`def modify(`
Original file line number	Diff line number	Diff line change
`@@ -162,10 +162,6 @@ def sort_parameters(self, hps_to_sort: set[str] \| None = None, verbose: bool = F`
`162`	`162`	`f"{'order' if hp._importance_is_order else 'importance'}={importance}"`
`163`	`163`	`)`
`164`	`164`
`165`		`- # now that we have enforced an order we can force reassign all parameters/buffers!`
`166`		`- for _, mod in self.named_dynamic_modules():`
`167`		`- mod.force_assign()`
`168`		`-`
`169`	`165`	`# go back to old config`
`170`	`166`	`self.select(config)`
`171`	`167`