Arm backend: Fix torch.matmul() failures for 2D tensor inputs (pytorch#14624)

YufengShi-dudu · oscarandersson8218 · web-flow · commit 9a7fb42d5ac9 · 2025-10-06T18:19:11.000+02:00
- ConvertMmToBmmPass converts an MM node to BMM nodes, turns input and output tensors from rank-2 to rank-3 via unsqueeze/squeeze, and inserts q-dq before and after BMM node when necessary. - After ConvertMmToBmmPass: ``` x -> q -> dq -> unsqueeze -> q_2 -> dq_2 -> \ bmm -> q_4 -> dq_4 / y -> q_1 -> dq_1 -> unsqueeze -> q_3 -> dq_3 -> ``` - Therefore, if the original matmul was 2D, the bmm already has DQ nodes on its inputs and Q node on its output. If AnnotateDecomposedMatmulPass (pytorch#10654) is still applied in this case, it produces illegal sequences such as: x -> q -> unsqueeze -> q_2 (invalid) - Fix by checking whether the BMM is already surrounded by DQ nodes on its inputs and Q nodes on its output. Change-Id: I9949d59b0b4a96fa34a88b0734014567ea6f24cc cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 Signed-off-by: Yufeng Shi <yufeng.shi@arm.com> Co-authored-by: Oscar Andersson <oscar.andersson@arm.com>
diff --git a/backends/arm/_passes/annotate_decomposed_matmul.py b/backends/arm/_passes/annotate_decomposed_matmul.py
@@ -73,7 +73,10 @@ def call(self, graph_module: GraphModule) -> PassResult:
                 node for node in partition.nodes if node.target in matmul_targets
             ][0]
 
-            if quantized_input:
+            if quantized_input and not all(
+                input_node.target in DQ_OPS
+                for input_node in matmul_node.all_input_nodes
+            ):
                 matmul_args = matmul_node.all_input_nodes
                 for node in matmul_args:
                     # Find the dq-node connected to this mm/bmm arg
@@ -99,7 +102,9 @@ def call(self, graph_module: GraphModule) -> PassResult:
 
             partition_output = list(partition.output_nodes[0].users)[0]
             quantized_output = partition_output.target in Q_OPS
-            if quantized_output:
+            if quantized_output and not all(
+                user.target in Q_OPS for user in matmul_node.users
+            ):
                 with graph_module.graph.inserting_after(matmul_node):
                     # Create q-node after matmul
                     q_node = create_node(
diff --git a/backends/arm/test/ops/test_matmul.py b/backends/arm/test/ops/test_matmul.py
@@ -22,6 +22,7 @@
 
 class MatMul(torch.nn.Module):
     test_data_generators = {
+        "rand_rand_2d": lambda: (torch.rand(5, 5), torch.rand(5, 2)),
         "rand_rand_3d": lambda: (torch.rand(2, 3, 5), torch.rand(2, 5, 2)),
         "rand_rand_4d": lambda: (torch.rand(1, 2, 3, 5), torch.rand(1, 2, 5, 2)),
     }
@@ -32,6 +33,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor):
 
 class MatMulSingleInput(torch.nn.Module):
     test_data_generators = {
+        "rand_2d": lambda: (torch.rand(5, 5),),
         "rand_3d": lambda: (torch.rand(2, 5, 5),),
         "rand_4d": lambda: (torch.rand(1, 2, 5, 5),),
     }
@@ -42,6 +44,11 @@ def forward(self, x: torch.Tensor):
 
 class MatMulCombo(torch.nn.Module):
     test_data_generators = {
+        "rand_rand_rand_2d": lambda: (
+            torch.rand(5, 5),
+            torch.rand(5, 2),
+            torch.rand(2, 5),
+        ),
         "rand_rand_rand_3d": lambda: (
             torch.rand(2, 5, 5),
             torch.rand(2, 5, 2),