Inject identity nodes in sanitizer; revert existing logic; update test

aboubezari · aboubezari · commit 03529fc089b7 · 2025-09-30T15:33:22.000-07:00
Signed-off-by: Ali Boubezari &lt;aboubezari@nuro.ai&gt;
diff --git a/modelopt/onnx/autocast/graphsanitizer.py b/modelopt/onnx/autocast/graphsanitizer.py
@@ -65,6 +65,7 @@ def sanitize(self) -> None:
         self.replace_custom_domain_nodes()
         self.cleanup_model()
         self.set_ir_version(self.max_ir_version)
+        self.sanitize_io_casts()
 
     def find_custom_nodes(self) -> None:
         """Find custom nodes in the model.
@@ -322,6 +323,33 @@ def _match_layernorm_pattern(self, mean_node: onnx.NodeProto) -> dict | None:
             logger.debug(f"Failed to match LayerNorm pattern at {mean_node.name}: {e!s}")
             return None
 
+    def sanitize_io_casts(self) -> None:
+        """Handle the special case where an input is casted directly to an output.
+
+        Inject an identity node after the cast node.
+        """
+        model_input_names = {input.name for input in self.model.graph.input}
+        model_output_names = {output.name for output in self.model.graph.output}
+        nodes_to_add = []
+        for node in self.model.graph.node:
+            if node.op_type == "Cast":
+                if node.input[0] in model_input_names and node.output[0] in model_output_names:
+                    cast_input_name = node.input[0]
+                    cast_output_name = node.output[0]
+                    cast_new_output_name = cast_input_name + "_io_cast_identity"
+                    nodes_to_add.append(
+                        helper.make_node(
+                            "Identity",
+                            inputs=[cast_new_output_name],
+                            outputs=[cast_output_name],
+                            name=node.name + "_io_cast_identity",
+                        )
+                    )
+                    node.output[0] = cast_new_output_name
+
+        for node in nodes_to_add:
+            self.model.graph.node.append(node)
+
     def _create_layernorm_node(self, pattern: dict) -> onnx.NodeProto:
         """Create a LayerNormalization node with optional bias."""
         ln_name = f"LayerNorm_{pattern['mean_node'].name}"
diff --git a/modelopt/onnx/autocast/precisionconverter.py b/modelopt/onnx/autocast/precisionconverter.py
@@ -586,42 +586,9 @@ def _bypass_cast_node(self, node: onnx.NodeProto) -> None:
                         consumer.input[i] = input_tensor
 
     def _remove_preexisting_casts(self) -> None:
-        # First check for special case where an input is casted directly to an output
-        model_input_names = {input.name for input in self.model.graph.input}
-        model_output_names = {output.name for output in self.model.graph.output}
-        # Ensure that special casts that we add are not removed by the following logic
-        casts_to_skip = []
-        # Add casts as a separate step to avoid modifying the graph while iterating over it
-        casts_to_add = []
-        for node in self.model.graph.node:
-            if node.op_type == "Cast":
-                if node.input[0] in model_input_names and node.output[0] in model_output_names:
-                    # Create a special cast just for the input-output case.
-                    new_cast = helper.make_node(
-                        "Cast",
-                        name=node.name,
-                        inputs=[node.input[0]],
-                        outputs=[node.output[0]],
-                        to=utils.get_cast_to_type(node),
-                    )
-                    casts_to_skip.append(node.name)
-                    casts_to_add.append(new_cast)
-                    # Now adjust the old cast's name, consumers and producers
-                    node.name = f"{node.name}_io_special_case"
-                    node_new_output_name = f"{node.output[0]}_io_special_case"
-                    for consumer in utils.get_consumer_nodes(self.model, node.output[0]):
-                        for i, input_name in enumerate(consumer.input):
-                            if input_name == node.output[0]:
-                                consumer.input[i] = node_new_output_name
-                    node.output[0] = node_new_output_name
-
-        for cast in casts_to_add:
-            self.model.graph.node.append(cast)
-        casts_to_skip = set(casts_to_skip)
-
         nodes_to_remove = []
         for node in self.model.graph.node:
-            if node.op_type == "Cast" and node.name not in casts_to_skip:
+            if node.op_type == "Cast":
                 cast_from_type = self._get_tensor_type(node.input[0])
                 cast_to_type = utils.get_cast_to_type(node)
                 is_fp_cast = cast_to_type in [
diff --git a/tests/unit/onnx/autocast/test_precisionconverter.py b/tests/unit/onnx/autocast/test_precisionconverter.py
@@ -20,6 +20,7 @@
 
 import modelopt.onnx.autocast.utils as utils
 import modelopt.onnx.utils as onnx_utils
+from modelopt.onnx.autocast.graphsanitizer import GraphSanitizer
 from modelopt.onnx.autocast.logging_config import configure_logging
 from modelopt.onnx.autocast.precisionconverter import PrecisionConverter
 
@@ -1072,15 +1073,22 @@ def model_with_casted_input_to_output():
 
 
 @pytest.mark.parametrize("low_precision_type", ["fp16", "bf16"])
-def test_casted_input_to_output_model(model_with_casted_input_to_output, low_precision_type):
+@pytest.mark.parametrize("keep_io_types", [True, False])
+def test_casted_input_to_output_model(
+    model_with_casted_input_to_output, low_precision_type, keep_io_types
+):
     model, value_info_map, initializer_map, node_to_init_map = model_with_casted_input_to_output
 
+    min_opset = 22 if low_precision_type == "bf16" else 13
+    graph_sanitizer = GraphSanitizer(model, min_opset)
+    graph_sanitizer.sanitize()
+    model = graph_sanitizer.model
     converter = PrecisionConverter(
         model,
         value_info_map,
         initializer_map,
         node_to_init_map,
-        keep_io_types=True,
+        keep_io_types=keep_io_types,
         low_precision_type=low_precision_type,
     )
     converted_model = converter.convert(