pytorch
diff --git a/‎.buckconfig‎
Lines changed: 3 additions & 1 deletion b/‎.buckconfig‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 5 additions & 1 deletion b/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/apple-perf.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/apple-perf.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 0 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 4 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/arm/_passes/convert_minmax_pass.py‎
Lines changed: 136 additions & 0 deletions b/‎backends/arm/_passes/convert_minmax_pass.py‎
Lines changed: 136 additions & 0 deletions
diff --git a/‎backends/arm/_passes/keep_dims_false_to_squeeze_pass.py‎
Lines changed: 4 additions & 5 deletions b/‎backends/arm/_passes/keep_dims_false_to_squeeze_pass.py‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎backends/arm/operator_support/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operator_support/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -11,13 +11,15 @@
   shim_et = shim_et
 
 [repository_aliases]
+  bazel_skylib = shim
   config = prelude
   ovr_config = prelude
   toolchains = shim_et
   fbcode = shim_et
-  fbcode_macros = shim_et
+  fbcode_macros = shim
   fbsource = shim_et
   buck = shim
+  gh_facebook_buck2_shims_meta = shim
 
 [cxx]
   cxxflags = -g -std=c++17
 
@@ -7,7 +7,11 @@
 set -eux
 
 # TODO: expand this to //...
-buck2 query //runtime/...
+# TODO: can't query cadence & vulkan backends
+buck2 query "//backends/apple/... + //backends/example/... + \
+//backends/mediatek/... + //backends/test/... + //backends/transforms/... + \
+//backends/xnnpack/... + //configurations/... + //kernels/portable/cpu/... + \
+//runtime/... + //schema/... + //test/... + //util/..."
 
 # TODO: expand the covered scope of Buck targets.
 buck2 build //runtime/core/portable_type/...
 
@@ -20,7 +20,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: stories110M
+        default: llama
       devices:
         description: Target devices to run benchmark
         required: false
@@ -36,7 +36,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: stories110M
+        default: llama
       devices:
         description: Target devices to run benchmark
         required: false
 
@@ -20,7 +20,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: stories110M
+        default: llama
       devices:
         description: Target devices to run benchmark
         required: false
@@ -36,7 +36,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: stories110M
+        default: llama
       devices:
         description: Target devices to run benchmark
         required: false
 
@@ -8,6 +8,7 @@
 __pycache__/
 
 # Build and tool-generated files
+arm_test/
 buck-out/
 buck2-bin/
 cmake-android-out/
@@ -33,6 +34,7 @@ pip-out/
 
 # Xcode
 xcuserdata/
+.build/
 .swiftpm/
 *.xcworkspace/
 *.xcframework/
@@ -724,7 +724,6 @@ endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/flat_tensor)
-  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/flat_tensor/serialize)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_LLM)
 
@@ -21,6 +21,7 @@
 from executorch.backends.arm._passes.convert_full_like_to_full_pass import (
     ConvertFullLikeToFullPass,
 )
+from executorch.backends.arm._passes.convert_minmax_pass import ConvertMinMaxPass
 from executorch.backends.arm._passes.convert_split_to_slice import (
     ConvertSplitToSlicePass,
 )
@@ -106,6 +107,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ConvertMeanDimToAveragePoolPass())
         self.add_pass(ConvertFullLikeToFullPass())
         self.add_pass(ConvertToClampPass())
+        self.add_pass(ConvertMinMaxPass())
 
         self.add_pass(ReplaceScalarWithTensorArgPass())
         self.add_pass(AnnotateDecomposedMatmulPass())
@@ -147,6 +149,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(DecomposeSoftmaxesPass())
         self.add_pass(ConvertFullLikeToFullPass())
         self.add_pass(ConvertToClampPass())
+        self.add_pass(ConvertMinMaxPass())
 
         self.add_pass(AnnotateDecomposedMatmulPass())
         self.add_pass(QuantizeOperatorArguments())
@@ -190,4 +193,5 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeMeanDimPass())
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeSoftmaxesPass())
+        self.add_pass(ConvertMinMaxPass())
         return self._transform(graph_module)
@@ -0,0 +1,136 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+
+
+class ConvertMinMaxPass(ExportPass):
+    """
+    Converts min/max to amin/amax and unrolls multi-dimensional reduction and keep-dims arg to be
+    TOSA compliant.
+
+    The difference between max/min and amax/amin is (from pytorch docs):
+        - amax/amin supports reducing on multiple dimensions,
+        - amax/amin does not return indices,
+        - amax/amin evenly distributes gradient between equal values, while max(dim)/min(dim)
+          propagates gradient only to a single index in the source tensor.
+    Since we do not care about gradients post training, convert min/max ops to amin/amax as long as
+    the indices are not used.
+
+    Original:
+        amax([dim1, dim2], keepdim = False)
+    After pass:
+        amax(dim1, keepdim = True)
+        amax(dim2, keepdim = True)
+        squeeze(dim = [dim1, dim2])
+    """
+
+    def check_argmax(self, node):
+        """
+        Raises a RuntimeError if the argmax value returned by the min/max op is used in the graph.
+        """
+        if node.target in [torch.ops.aten.max.dim, torch.ops.aten.min.dim]:
+            no_argmax = len(node.users) == 1
+            no_argmax_users = (len(node.users) == 2) and (
+                len(list(node.users)[1].users) == 0
+            )
+            if not (no_argmax or no_argmax_users):
+                raise RuntimeError("Argmax is not supported by the arm_quantizer")
+
+    def get_variables(self, node):
+        """Returns variables specific for each op handled by the pass."""
+        if node.target in [
+            exir_ops.edge.aten.amax.default,
+            exir_ops.edge.aten.amin.default,
+        ]:
+            replace_node = node
+            op = node.target
+            squeeze_op = exir_ops.edge.aten.squeeze_copy.dims
+        elif node.target == exir_ops.edge.aten.max.dim:
+            replace_node = list(node.users)[0]
+            op = exir_ops.edge.aten.amax.default
+            squeeze_op = exir_ops.edge.aten.squeeze_copy.dims
+        elif node.target == exir_ops.edge.aten.min.dim:
+            replace_node = list(node.users)[0]
+            op = exir_ops.edge.aten.amin.default
+            squeeze_op = exir_ops.edge.aten.squeeze_copy.dims
+        elif node.target == torch.ops.aten.max.dim:
+            replace_node = list(node.users)[0]
+            op = torch.ops.aten.amax.default
+            squeeze_op = torch.ops.aten.squeeze.dims
+        elif node.target == torch.ops.aten.min.dim:
+            replace_node = list(node.users)[0]
+            op = torch.ops.aten.amin.default
+            squeeze_op = torch.ops.aten.squeeze.dims
+        else:
+            raise RuntimeError(
+                f"{node.name} is not an accepted target for ConvertMinMaxPass()"
+            )
+
+        return (replace_node, op, squeeze_op)
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        modified = False
+        for node in graph_module.graph.nodes:
+            if node.op != "call_function":
+                continue
+            if node.target not in [
+                exir_ops.edge.aten.amax.default,
+                exir_ops.edge.aten.amin.default,
+                exir_ops.edge.aten.max.dim,
+                exir_ops.edge.aten.min.dim,
+                torch.ops.aten.max.dim,
+                torch.ops.aten.min.dim,
+            ]:
+                continue
+
+            self.check_argmax(
+                node
+            )  # TODO: MLETORCH-718 : Quantization of indices in arm_quantizer
+            replace_node, op, squeeze_op = self.get_variables(node)
+
+            # Unwrap args
+            if len(node.args) == 2:
+                input_node, dims = node.args
+                keepdims = False
+            elif len(node.args) == 3:
+                input_node, dims, keepdims = node.args
+            else:
+                raise RuntimeError(f"Unexpected arg size in {node.name}")
+
+            try:
+                iter(dims)
+            except:
+                dims = [dims]
+            else:
+                dims = list(dims)
+
+            # Unroll multi-dimensional reduction and keep-dims arg
+            with graph_module.graph.inserting_before(node):
+
+                for dim in dims:
+                    args = (input_node, dim, True)
+                    input_node = graph_module.graph.create_node(
+                        "call_function", op, args, node.kwargs
+                    )
+
+                if not keepdims:
+                    input_node = graph_module.graph.create_node(
+                        "call_function",
+                        squeeze_op,
+                        (input_node, dims),
+                    )
+
+            replace_node.replace_all_uses_with(input_node)
+            modified = True
+
+        if modified:
+            graph_module.graph.eliminate_dead_code()
+            graph_module.recompile()
+            graph_module = super().call(graph_module).graph_module
+
+        return PassResult(graph_module, True)
@@ -1,5 +1,4 @@
 # Copyright 2024-2025 Arm Limited and/or its affiliates.
-# All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -36,18 +35,18 @@ class KeepDimsFalseToSqueezePass(ExportPass):
     """
 
     # CURRENTLY NOT HANDLED OPS
-    # exir_ops.edge.aten.amax,
-    # exir_ops.edge.aten.amin,
     # exir_ops.edge.aten.any.dim,
     # exir_ops.edge.aten.any.dims,
     # exir_ops.edge.aten.argmax,
     # exir_ops.edge.aten.argmin,
-    # exir_ops.edge.aten.max.dim,
-    # exir_ops.edge.aten.min.dim,
     # exir_ops.edge.aten.prod.dim_int,
 
     # HANDLED OPS
     # exir_ops.edge.aten.sum.dim_IntList
+    # exir_ops.edge.aten.max.dim (decomposed in convert_minmax_pass)
+    # exir_ops.edge.aten.min.dim (decomposed in convert_minmax_pass)
+    # exir_ops.edge.aten.amin (decomposed in convert_minmax_pass)
+    # exir_ops.edge.aten.amax (decomposed in convert_minmax_pass)
     # exir_ops.edge.aten.var.correction (decomposed in decompose_var_pass)
     # exir_ops.edge.aten.var.dim (decomposed in decompose_var_pass)
     # exir_ops.edge.aten.mean.dim (decomposed in decompose_meandim_pass)
 
@@ -7,6 +7,7 @@
 
 from . import (  # noqa
     convolution_support,
+    minmax_support,
     pool_2d_support,
     reduce_sum_support,
     right_shift_support,