pytorch
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎backends/arm/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎backends/arm/_passes/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/arm/_passes/add_bias_pass.py‎
Lines changed: 62 additions & 0 deletions b/‎backends/arm/_passes/add_bias_pass.py‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/_passes/match_where_self_arg_dtype_pass.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/_passes/match_where_self_arg_dtype_pass.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/arm_backend.py‎
Lines changed: 34 additions & 4 deletions b/‎backends/arm/arm_backend.py‎
Lines changed: 34 additions & 4 deletions
diff --git a/‎backends/arm/operators/op_conv2d.py‎
Lines changed: 0 additions & 36 deletions b/‎backends/arm/operators/op_conv2d.py‎
Lines changed: 0 additions & 36 deletions
diff --git a/‎backends/arm/test/ops/test_where.py‎
Lines changed: 7 additions & 0 deletions b/‎backends/arm/test/ops/test_where.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎backends/cadence/aot/fuse_ops.py‎
Lines changed: 5 additions & 23 deletions b/‎backends/cadence/aot/fuse_ops.py‎
Lines changed: 5 additions & 23 deletions
@@ -19,7 +19,7 @@ It supports a wide range of models including LLMs (Large Language Models), CV (C
 Platform Support:
 - Operating Systems:
   - iOS
-  - Mac
+  - MacOS (ARM64)
   - Android
   - Linux
   - Microcontrollers
 
@@ -12,6 +12,8 @@ if(NOT EXECUTORCH_ROOT)
   set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
 endif()
 
+add_compile_options("-Wall" "-Werror")
+
 include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
 
 set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
 
@@ -5,9 +5,10 @@
 
 
 from . import arm_pass_utils  # noqa
+from .arm_pass import ArmPass  # noqa  # usort: skip
+from .add_bias_pass import AddBiasPass  # noqa
 from .annotate_channels_last_dim_order_pass import AnnotateChannelsLastDimOrder  # noqa
 from .annotate_decomposed_matmul import AnnotateDecomposedMatmulPass  # noqa
-from .arm_pass import ArmPass  # noqa
 from .broadcast_args_pass import BroadcastArgsPass  # noqa
 from .cast_int64_pass import CastInt64BuffersToInt32Pass  # noqa
 from .cast_to_int32_pass import CastToInt32Pass  # noqa
 
@@ -0,0 +1,62 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
+from executorch.backends.transforms.utils import create_constant_placeholder
+
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import PassResult
+from torch.export.graph_signature import InputKind
+
+
+class AddBiasPass(ArmPass):
+    """TOSA requires convolution nodes to have a bias input.
+    This pass adds a bias input to convolution nodes that do not have one.
+    The bias is set to zero.
+    """
+
+    targeted_ops = (exir_ops.edge.aten.convolution.default,)
+
+    def call(self, graph_module):
+        modified = False
+        for node in graph_module.graph.nodes:
+            if node.op != "call_function":
+                continue
+            if node.target not in self.targeted_ops:
+                continue
+
+            if len(node.all_input_nodes) < 3:
+                modified = True
+                # bias is missing
+                weight_node = node.all_input_nodes[1]
+                output_channels = get_first_fake_tensor(weight_node).shape[0]
+                # add a node containging zeros
+                # if quantized, use int32, otherwise use float32
+                if (
+                    "output_qparams" in node.meta
+                    and len(node.meta["output_qparams"]) > 0
+                ):
+                    bias_data = torch.zeros(size=(output_channels,), dtype=torch.int32)
+                else:
+                    bias_data = torch.zeros(
+                        size=(output_channels,), dtype=torch.float32
+                    )
+
+                with graph_module.graph.inserting_after(weight_node):
+                    bias_node = create_constant_placeholder(
+                        self.exported_program,
+                        graph=graph_module.graph,
+                        kind=InputKind.PARAMETER,
+                        data=bias_data,
+                        persistent_buffer=True,
+                        name=f"{node.name}_bias",
+                    )
+                node.update_arg(2, bias_node)
+
+        if modified:
+            graph_module = super().call(graph_module).graph_module
+        return PassResult(graph_module, modified)
@@ -7,6 +7,7 @@
 
 # pyre-unsafe
 from executorch.backends.arm._passes import (
+    AddBiasPass,
     AnnotateChannelsLastDimOrder,
     AnnotateDecomposedMatmulPass,
     BroadcastArgsPass,
@@ -134,6 +135,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
 
         self.add_pass(FuseViewCopyTransform())
         self.add_pass(FuseConstantArgsPass(exported_program))
+        self.add_pass(AddBiasPass(exported_program))
 
         self.add_pass(InsertTableOpsPass(exported_program))
         self.add_pass(FuseEqualPlaceholdersPass(exported_program))
@@ -194,6 +196,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
 
         self.add_pass(FuseViewCopyTransform())
         self.add_pass(FuseConstantArgsPass(exported_program))
+        self.add_pass(AddBiasPass(exported_program))
         self.add_pass(InsertTableOpsPass(exported_program))
         self.add_pass(FuseEqualPlaceholdersPass(exported_program))
         self.add_pass(AnnotateChannelsLastDimOrder())
 
@@ -49,7 +49,7 @@ def call(self, graph_module: torch.fx.GraphModule):
 
             input_dtype = input_.meta["val"].dtype
             other_dtype = other_.meta["val"].dtype
-            target_dtype = torch.float32
+            target_dtype = input_dtype
             if input_dtype != other_dtype:
                 target_dtype = get_largest_dtype(input_dtype, other_dtype)
 
 
@@ -10,12 +10,15 @@
 # backends. Converts via TOSA as an intermediate form supported by AoT and
 # JIT compiler flows.
 #
-
 from typing import List, Optional
 
-from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.backends.arm.tosa_specification import (  # type: ignore[import-not-found]
+    TosaSpecification,
+)
 
-from executorch.exir.backend.compile_spec_schema import CompileSpec
+from executorch.exir.backend.compile_spec_schema import (  # type: ignore[import-not-found]
+    CompileSpec,
+)
 
 
 class ArmCompileSpecBuilder:
@@ -28,6 +31,7 @@ def __init__(self):
 
     def vgf_compile_spec(
         self,
+        tosa_spec: TosaSpecification = None,  # type: ignore[assignment]
         compiler_flags: Optional[str] = "",
     ) -> "ArmCompileSpecBuilder":
         """
@@ -40,7 +44,33 @@ def vgf_compile_spec(
         self.compiler_flags = [
             compiler_flags,
         ]
-        self.tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+MI")
+
+        if tosa_spec is None:
+            tosa_spec = TosaSpecification.create_from_string("TOSA-1.0+FP")
+
+        tosa_version = tosa_spec.version  # type: ignore[attr-defined]
+        tosa_profiles = tosa_spec.profiles  # type: ignore[attr-defined]
+
+        if tosa_version.major != 1:
+            raise ValueError(
+                "Arm backend only supports converter-backend for TOSA version 1. "
+                f"Invalid TOSA version: {tosa_version}"
+            )
+
+        if not ("FP" or "INT" in tosa_profiles):
+            raise ValueError(
+                "Arm backend only supports converter-backend for FP or INT. "
+                f"Invalid TOSA profile: {tosa_profiles}"
+            )
+
+        if len(tosa_profiles) != 1:
+            raise ValueError(
+                "For now Arm backend only supports converter-backend for either FP or INT. "
+                f"Invalid TOSA profile: {tosa_profiles}"
+            )
+
+        self.tosa_spec = tosa_spec
+
         return self
 
     def ethosu_compile_spec(
 
@@ -109,24 +109,6 @@ def define_node(
             local_bound=False,
         )
 
-        # Non-bias case.
-        if len(node.all_input_nodes) == 2:
-            # Create a zero bias tensor if not presented
-            out_channels = weight.shape[0]
-            bias_name = "bias" + node.name.split("default", 1)[1]
-            bias_type = output.dtype
-            if output.dtype == ts.DType.INT8:
-                # Conv is quantized to int8, but the TOSA operator has
-                # output type int32, and the bias must be the same type
-                # as the TOSA output type
-                bias_type = ts.DType.INT32
-            bias = tosa_graph.addConst(
-                [out_channels],
-                bias_type,
-                [0] * out_channels,
-                name=bias_name,
-            )
-
         # The output type is int32 when input type is int8.
         conv2d_output_name = output.name
         if output.dtype == ts.DType.INT8:
@@ -313,24 +295,6 @@ def define_node(
             name=f"{conv2d_output_name}_weight_zp",
         )
 
-        # Non-bias case.
-        if len(node.all_input_nodes) == 2:
-            # Create a zero bias tensor if not presented
-            out_channels = weight.shape[0]
-            bias_name = f"{conv2d_output_name}_bias"
-            bias_type = output.dtype
-            if output.dtype == ts.DType.INT8:
-                # Conv is quantized to int8, but the TOSA operator has
-                # output type int32, and the bias must be the same type
-                # as the TOSA output type
-                bias_type = ts.DType.INT32
-            bias = tosa_graph.addConst(
-                [out_channels],
-                bias_type,
-                [0] * out_channels,
-                name=bias_name,
-            )
-
         # Given input.shape is (N, Ci, H, W), and weight.shape is (Co, Ci/G, H, W)
         in_channels = input.shape[1]
         out_channels = weight.shape[0]
 
@@ -121,6 +121,12 @@ def scalar_condition(input: torch.Tensor):
     scalar_condition,
 )
 
+int32_scalar_cond = Where(
+    1,
+    torch.int32,
+    scalar_condition,
+)
+
 test_modules_common = {
     "two_dim_tensor_cond": lambda: two_dim_tensor_cond,
     "three_dim_tensor_cond": lambda: three_dim_tensor_cond,
@@ -134,6 +140,7 @@ def scalar_condition(input: torch.Tensor):
     **test_modules_common,
     "float32_tensor_cond_tuple_dtype": lambda: float32_tensor_cond_tuple_dtype,
     "float32_tensor_cond_tuple_dtype_bool": lambda: float32_tensor_cond_tuple_dtype_bool,
+    "int32_scalar_cond": lambda: int32_scalar_cond,
 }
 
 test_modules_BI = {
 
@@ -712,32 +712,14 @@ def _create_requantize_node(
         out_dtype: torch.dtype,
         graph: torch.fx.Graph,
     ) -> torch.fx.Node:
-        in_scale_tensor = graph.call_function(
-            exir_ops.edge.aten.full.default, args=((1,), in_scale)
-        )
-        in_zero_point_tensor = graph.call_function(
-            exir_ops.edge.aten.full.default,
-            args=((1,), in_zero_point),
-            kwargs={"dtype": torch.int32},
-        )
-        out_scale_tensor = graph.call_function(
-            exir_ops.edge.aten.full.default, args=((1,), out_scale)
-        )
-        out_zero_point_tensor = graph.call_function(
-            exir_ops.edge.aten.full.default,
-            args=((1,), out_zero_point),
-            kwargs={"dtype": torch.int32},
-        )
-        # cadence::requantize(Tensor input, Tensor in_scale, Tensor in_zero_point, Tensor out_scale, Tensor out_zero_point, ScalarType out_dtype) -> Tensor Y
-        # TODO(hardiksharma): Add support for per-tensor requantize.
         return graph.call_function(
-            exir_ops.edge.cadence.requantize.default,
+            exir_ops.edge.cadence.requantize.per_tensor,
             args=(
                 in_tensor,
-                in_scale_tensor,
-                in_zero_point_tensor,
-                out_scale_tensor,
-                out_zero_point_tensor,
+                in_scale,
+                in_zero_point,
+                out_scale,
+                out_zero_point,
                 out_dtype,
             ),
         )