pytorch
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/_passes/fuse_constant_ops_pass.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/fuse_constant_ops_pass.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/rewrite_matmul.py‎
Lines changed: 97 additions & 0 deletions b/‎backends/arm/_passes/rewrite_matmul.py‎
Lines changed: 97 additions & 0 deletions
diff --git a/‎backends/arm/operators/__init__.py‎
Lines changed: 5 additions & 5 deletions b/‎backends/arm/operators/__init__.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎backends/arm/operators/op_bmm.py‎
Lines changed: 0 additions & 143 deletions b/‎backends/arm/operators/op_bmm.py‎
Lines changed: 0 additions & 143 deletions
@@ -91,6 +91,7 @@
     ReplaceScalarWithTensorArgPassTOSABI,
     ReplaceScalarWithTensorArgPassTOSAMI,
 )
+from .rewrite_matmul import RewriteMatmulPass  # noqa
 from .rewrite_upsample import RewriteUpsamplePass  # noqa
 from .scalars_to_attribute_pass import ScalarsToAttributePass  # noqa
 from .size_adjust_input_pass import SizeAdjustInputPass  # noqa
 
@@ -92,6 +92,7 @@
     ReplaceScalarWithTensorArgPassTOSABI,
     ReplaceScalarWithTensorArgPassTOSAMI,
     RetraceFoldedDtypesPass,
+    RewriteMatmulPass,
     RewriteUpsamplePass,
     ScalarsToAttributePass,
     SizeAdjustInputPass,
@@ -211,6 +212,7 @@ def _tosa_INT_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(RewriteUpsamplePass(exported_program))
         self.add_pass(AddBiasPass(exported_program))
 
+        self.add_pass(RewriteMatmulPass(exported_program))
         self.add_pass(FuseEqualPlaceholdersPass(exported_program))
         self.add_pass(ToTosaMemoryFormatPass(exported_program))
         self.add_pass(RemoveNoopPass())
@@ -297,6 +299,7 @@ def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(RewriteUpsamplePass(exported_program))
         self.add_pass(AddBiasPass(exported_program))
         self.add_pass(InsertTableOpsPass(exported_program))
+        self.add_pass(RewriteMatmulPass(exported_program))
         self.add_pass(FuseEqualPlaceholdersPass(exported_program))
         self.add_pass(ToTosaMemoryFormatPass(exported_program))
         self.add_pass(RemoveNoopPass())
 
@@ -114,6 +114,7 @@ def call(self, graph_module):
             if node.op != "call_function":
                 continue
             if node.target in [
+                exir_ops.backend.tosa.MATMUL.default,
                 exir_ops.backend.tosa.RESCALE.default,
                 exir_ops.backend.tosa.RESIZE.default,
                 exir_ops.backend.tosa.TABLE.default,
 
@@ -0,0 +1,97 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Set, Type
+
+import torch
+from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes.arm_pass_utils import (
+    create_node,
+    get_first_fake_tensor,
+)
+from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
+    get_input_qparams,
+    get_output_qparams,
+)
+from executorch.backends.arm.tosa.mapping import TosaSpecialDtype
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+
+
+class RewriteMatmulPass(ArmPass):
+    """Rewrites aten.bmm to tosa.MATMUL and inserts a tosa.RESCALE op if needed."""
+
+    _passes_required_after: Set[Type[ExportPass]] = set()
+
+    def _insert_output_rescale(self, graph_module, node, tosa_matmul_node, dtype):
+        input_qparams = get_input_qparams(node)
+        output_qparams = get_output_qparams(node)[0]
+        scale = (
+            input_qparams[0].get_scale_per_tensor()
+            * input_qparams[1].get_scale_per_tensor()
+        ) / output_qparams.get_scale_per_tensor()
+
+        with graph_module.graph.inserting_after(tosa_matmul_node):
+            # If the input is int8, we need to cast the output to int32
+            rescale_node = create_node(
+                graph_module.graph,
+                op_target=exir_ops.backend.tosa.RESCALE.default,
+                from_node=tosa_matmul_node,
+            )
+            tosa_matmul_node.replace_all_uses_with(rescale_node)
+            rescale_node.args = (
+                tosa_matmul_node,
+                dtype,
+                scale,
+                0,
+                output_qparams.get_zp_per_tensor(),
+            )
+
+    def call(self, graph_module):
+        modified = False
+        for node in graph_module.graph.nodes:
+            if (
+                node.op != "call_function"
+                or node.target != exir_ops.edge.aten.bmm.default
+            ):
+                continue
+            modified = True
+
+            x1, x2 = node.args
+            tosa_matmul_target = exir_ops.backend.tosa.MATMUL.default
+            with graph_module.graph.inserting_before(node):
+                tosa_matmul_node = create_node(
+                    graph_module.graph,
+                    op_target=tosa_matmul_target,
+                    args=(x1, x2),
+                    kwargs={},
+                    from_node=node,
+                )
+                node.replace_all_uses_with(tosa_matmul_node)
+                graph_module.graph.erase_node(node)
+
+            x1_fake_tensor = get_first_fake_tensor(x1)
+            x2_fake_tensor = get_first_fake_tensor(x2)
+            output_fake_tensor = tosa_matmul_target(x1_fake_tensor, x2_fake_tensor)
+            node_output_fake_tensor = get_first_fake_tensor(node)
+            if (
+                output_fake_tensor.dtype == torch.int32
+                and node_output_fake_tensor.dtype in (torch.int8, torch.int16)
+            ):
+                self._insert_output_rescale(
+                    graph_module,
+                    node,
+                    tosa_matmul_node,
+                    dtype=node_output_fake_tensor.dtype,
+                )
+                if x1_fake_tensor.dtype == torch.int16:
+                    tosa_matmul_node.meta[TosaSpecialDtype.meta_key()] = (
+                        TosaSpecialDtype.INT48
+                    )
+
+        if modified:
+            graph_module.recompile()
+            graph_module = super().call(graph_module).graph_module
+        return PassResult(graph_module, modified)
@@ -14,7 +14,6 @@
     op_any,
     op_avg_pool2d,
     op_bitwise_not,
-    op_bmm,
     op_cat,
     op_ceil,
     op_clamp,
@@ -42,19 +41,20 @@
     op_pow,
     op_reciprocal,
     op_repeat,
-    op_rescale,
-    op_resize,
     op_rshift_tensor,
     op_rsqrt,
     op_sigmoid,
     op_sin,
     op_slice,
     op_sub,
     op_sum,
-    op_table,
     op_tanh,
     op_to_dim_order_copy,
-    op_transpose,
+    op_tosa_matmul,
+    op_tosa_rescale,
+    op_tosa_resize,
+    op_tosa_table,
+    op_tosa_transpose,
     op_view,
     op_where,
     ops_binary,
Original file line number	Diff line number	Diff line change
`@@ -91,6 +91,7 @@`
`91`	`91`	`ReplaceScalarWithTensorArgPassTOSABI,`
`92`	`92`	`ReplaceScalarWithTensorArgPassTOSAMI,`
`93`	`93`	`)`
	`94`	`+from .rewrite_matmul import RewriteMatmulPass # noqa`
`94`	`95`	`from .rewrite_upsample import RewriteUpsamplePass # noqa`
`95`	`96`	`from .scalars_to_attribute_pass import ScalarsToAttributePass # noqa`
`96`	`97`	`from .size_adjust_input_pass import SizeAdjustInputPass # noqa`