Fix bypassing of 'Cast' connecting a consumer with multiple outputs and the model's output

gcunhase · gcunhase · commit 434baf6b0f0d · 2025-09-09T12:42:18.000-04:00
Signed-off-by: gcunhase &lt;4861122+gcunhase@users.noreply.github.com&gt;
diff --git a/modelopt/onnx/autocast/precisionconverter.py b/modelopt/onnx/autocast/precisionconverter.py
@@ -557,6 +557,12 @@ def convert_initializer(
                         to_type=self.high_precision_type,
                     )
 
+    def _replace_tensor_name(self, consumers, original_tensor_name, new_tensor_name):
+        for consumer in consumers:
+            for idx, inp in enumerate(consumer.input):
+                if inp == original_tensor_name:
+                    consumer.input[idx] = new_tensor_name
+
     def _bypass_cast_node(self, node: onnx.NodeProto) -> None:
         # handling only a single input and output, as we only remove cast nodes
         assert len(node.input) == 1
@@ -576,6 +582,9 @@ def _bypass_cast_node(self, node: onnx.NodeProto) -> None:
                     for i, prod_out in enumerate(producer.output):
                         if prod_out == input_tensor:
                             producer.output[i] = output_tensor
+                            consumers = utils.get_consumer_nodes(self.model, prod_out)
+                            if len(consumers) > 1:
+                                self._replace_tensor_name(consumers, prod_out, output_tensor)
         if (
             not is_output_producer
         ):  # Reconnect consumers of the cast output to use the cast input instead