fixed bug with multiple outputs having multiple dim orders

leafs1 · leafs1 · commit 619e98cbd2c5 · 2025-06-13T13:25:43.000-07:00
diff --git a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py
@@ -91,18 +91,10 @@ def is_nchw_node(self, node: torch.fx.Node) -> bool:
         return not self.is_nhwc_node(node)
 
     def requires_nhwc_input(self, node: torch.fx.Node) -> bool:
-        return (
-            node.target in self.memory_sensitive_ops_nhwc
-            or node.name == "output"
-            and not node.args[0][0].meta["val"].is_contiguous()
-        )
+        return node.target in self.memory_sensitive_ops_nhwc
 
     def requires_nchw_inputs(self, node: torch.fx.Node) -> bool:
-        return (
-            node.target in self.memory_sensitive_ops_nchw
-            or node.name == "output"
-            and node.args[0][0].meta["val"].is_contiguous()
-        )
+        return node.target in self.memory_sensitive_ops_nchw
 
     def can_be_converted_to_nhwc(self, node: torch.fx.Node) -> bool:
         # There are two conditions that must be met for a node to be able to
@@ -380,18 +372,21 @@ def call(self, graph_module: torch.fx.GraphModule):  # noqa: C901
                 # This node has no inputs so we don't need to change anything
                 continue
 
-            if self.requires_nhwc_input(node):
+            # Need special case for output node because it can have multiple output dim orders as we can output a tuple multiple nodes
+            if node.op == "output":
+                out_tuple = node.args[0]
+                for out_node in out_tuple:
+                    if out_node.meta["val"].is_contiguous():
+                        self.input_to_nchw(graph_module, out_node, node)
+                    else:
+                        self.input_to_nhwc(graph_module, out_node, node)
+            elif self.requires_nhwc_input(node):
                 # Nodes which enter this branch are ones that require their
                 # first input to be nhwc. This makes this node's output nhwc too
-                # Currently, all nodes like this should have all of their other
-                # inputs as nchw, so fail if this is not true
-                if node.name == "output":
-                    self.input_to_nhwc(graph_module, node.args[0][0], node)
-                else:
-                    self.input_to_nhwc(graph_module, node.args[0], node)
-
-                for input_node in node.all_input_nodes[1:]:
-                    if self.is_nhwc_node(input_node):
+
+                self.input_to_nhwc(graph_module, node.args[0], node)
+                for input_node in node.all_input_nodes:
+                    if input_node.op == "placeholder" and self.is_nhwc_node(input_node):
                         raise AssertionError(
                             f"Expected {input_node} to be NCHW in channels last reshape pass"
                         )
diff --git a/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py b/backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py
@@ -335,3 +335,50 @@ def test_dq_conv2d_channels_last_tagged_reshape_pass(self) -> None:
             )
             .run_method_and_compare_outputs()
         )
+
+    class ConvAddConvOutput(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv1 = torch.nn.Conv2d(3, 16, 3)
+            self.conv2 = torch.nn.Conv2d(16, 16, 3)
+
+        def forward(self, x):
+            y = self.conv1(x)
+            z = torch.add(y, 1.0)
+            out1 = self.conv2(z)
+            out2 = z
+            return out1, out2
+
+    ConvAddConvOutputModule = ConvAddConvOutput()
+
+    def test_conv_add_conv_output(self):
+        x = torch.randn(1, 3, 8, 8)
+
+        self.run_tester(self.ConvAddConvOutput().eval(), (x,))
+
+        x_cl = x.to(memory_format=torch.channels_last)
+        self.run_tester(self.ConvAddConvOutput().eval(), (x_cl,))
+
+    class ThreeOutputsModel(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv1 = torch.nn.Conv2d(3, 3, 3)
+            self.conv2 = torch.nn.Conv2d(3, 3, 3)
+            self.linear = torch.nn.Linear(6, 6)
+
+        def forward(self, x):
+            conv1_out = self.conv1(x)
+            conv2_out = self.conv2(x)
+            linear_out = self.linear(x)
+
+            return linear_out, conv1_out, conv2_out
+
+    ThreeOutputsModelModule = ThreeOutputsModel()
+
+    def test_three_outputs_model(self):
+        x = torch.randn(1, 3, 6, 6)
+
+        self.run_tester(self.ThreeOutputsModelModule.eval(), (x,))
+
+        x_cl = x.to(memory_format=torch.channels_last)
+        self.run_tester(self.ThreeOutputsModelModule.eval(), (x_cl,))
diff --git a/backends/xnnpack/third-party/cpuinfo b/backends/xnnpack/third-party/cpuinfo
@@ -1 +1 @@
-Subproject commit c61fe919607bbc534d7a5a5707bdd7041e72c5ff
+Subproject commit 1e83a2fdd3102f65c6f1fb602c1b320486218a99
diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers
@@ -1 +1 @@
-Subproject commit fc32028858020c4fcafe37aaaeaf5d1b480336a2
+Subproject commit 57eb76d71d6dde5396520c7d35142eb868994e06
diff --git a/kernels/optimized/third-party/eigen b/kernels/optimized/third-party/eigen
@@ -1 +1 @@
-Subproject commit 729443409942a1816ddf74b95224003b83f4925c
+Subproject commit a39ade4ccf99df845ec85c580fbbb324f71952fa
diff --git a/third-party/ao b/third-party/ao
@@ -1 +1 @@
-Subproject commit bc68b11f1bf77be38721ca7dd2c477aeb5e6626e
+Subproject commit 214e70476f0093e84983ec015d1c737183e2f2ca