Fix

i-riyad · i-riyad · commit 626d99d495fd · 2025-08-30T14:30:40.000-07:00
Signed-off-by: Riyad Islam &lt;rislam@nvidia.com&gt;
diff --git a/modelopt/onnx/utils.py b/modelopt/onnx/utils.py
@@ -755,6 +755,8 @@ def onnx_type_str_to_enum(dtype: str) -> int:
 def remove_node_training_mode(onnx_model: onnx.ModelProto, node_op_type: str) -> onnx.ModelProto:
     """Remove `training_mode` attribute and extra training outputs from nodes of a given op type.
 
+    This also removes the unused outputs from the training_mode nodes.
+
     Args:
         onnx_model: The onnx model.
         node_op_type: The node type to remove training_mode attribute from.
@@ -763,33 +765,38 @@ def remove_node_training_mode(onnx_model: onnx.ModelProto, node_op_type: str) ->
         The onnx model with the training_mode attribute removed.
     """
     removed_output_names = set()
+    all_inputs = {inp for n in onnx_model.graph.node for inp in n.input}
+    graph_outputs = {o.name for o in onnx_model.graph.output}
+    keep = all_inputs | graph_outputs
 
     for node in onnx_model.graph.node:
         if node.op_type != node_op_type:
             continue
 
+        is_training_mode = False
         # Drop the 'training_mode' attribute if present
         for idx, attr in enumerate(list(node.attribute)):
             if attr.name == "training_mode":
                 del node.attribute[idx]
+                if attr.i == 1:
+                    is_training_mode = True
                 break
 
-        # If node has extra training outputs, keep only the first
-        if len(node.output) > 1:
-            removed_output_names.update(node.output[1:])
-            node.output[:] = node.output[:1]
+        # If the node has extra outputs, remove them all including the training outputs
+        if is_training_mode:
+            to_remove = []
+            for name in node.output:
+                if name not in keep:
+                    removed_output_names.add(name)
+                    to_remove.append(name)
+
+            for name in to_remove:
+                node.output.remove(name)
 
     if removed_output_names:
         # Clean up corresponding value_info entries
         keep = [vi for vi in onnx_model.graph.value_info if vi.name not in removed_output_names]
         del onnx_model.graph.value_info[:]
         onnx_model.graph.value_info.extend(keep)
 
-        # Also clean up graph.output entries
-        keep_outputs = [
-            out for out in onnx_model.graph.output if out.name not in removed_output_names
-        ]
-        del onnx_model.graph.output[:]
-        onnx_model.graph.output.extend(keep_outputs)
-
     return onnx_model
diff --git a/tests/unit/torch/deploy/utils/test_torch_onnx_utils.py b/tests/unit/torch/deploy/utils/test_torch_onnx_utils.py
@@ -306,11 +306,20 @@ def _make_batchnorm_model(bn_node, extra_value_infos=None):
         _make_bn_initializer("var", [3], 1.0),
     ]
 
+    graph_outputs = []
+    for output_name, shape in [
+        ("output", [1, 3, 224, 224]),
+        ("running_mean", [3]),
+        ("running_var", [3]),
+    ]:
+        if output_name in bn_node.output:
+            graph_outputs.append(make_tensor_value_info(output_name, onnx.TensorProto.FLOAT, shape))
+
     graph_def = make_graph(
         [bn_node],
         "test_graph",
         [make_tensor_value_info("input", onnx.TensorProto.FLOAT, [1, 3, 224, 224])],
-        [make_tensor_value_info("output", onnx.TensorProto.FLOAT, [1, 3, 224, 224])],
+        graph_outputs,
         initializer=initializers,
         value_info=extra_value_infos or [],
     )
@@ -350,11 +359,12 @@ def test_remove_node_extra_training_outputs():
             "running_var",
             "saved_mean",
             "saved_inv_std",
-        ],  # Extra training outputs
+        ],
         name="bn1",
         training_mode=1,
     )
 
+    # Extra training outputs are attached to the graph's value_info
     value_infos = [
         make_tensor_value_info("saved_mean", onnx.TensorProto.FLOAT, [3]),
         make_tensor_value_info("saved_inv_std", onnx.TensorProto.FLOAT, [3]),
@@ -363,10 +373,13 @@ def test_remove_node_extra_training_outputs():
     model = _make_batchnorm_model(bn_node, extra_value_infos=value_infos)
     result_model = remove_node_training_mode(model, "BatchNormalization")
 
-    # Verify only first output remains
+    # Verify only the non-training outputs remain
     bn_node_result = result_model.graph.node[0]
-    assert len(bn_node_result.output) == 1
+    print(bn_node_result.output)
+    assert len(bn_node_result.output) == 3
     assert bn_node_result.output[0] == "output"
+    assert bn_node_result.output[2] == "running_var"
+    assert bn_node_result.output[1] == "running_mean"
 
     # Verify value_info entries for removed outputs are cleaned up
     value_info_names = [vi.name for vi in result_model.graph.value_info]