diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
index 9c2a074372f..0b4e27e5aaa 100644
--- a/backends/arm/_passes/arm_pass_manager.py
+++ b/backends/arm/_passes/arm_pass_manager.py
@@ -28,6 +28,7 @@
 )
 from executorch.backends.arm._passes.decompose_linear_pass import DecomposeLinearPass
 from executorch.backends.arm._passes.decompose_meandim_pass import DecomposeMeanDimPass
+from executorch.backends.arm._passes.decompose_select import DecomposeSelectPass
 from executorch.backends.arm._passes.decompose_softmaxes_pass import (
     DecomposeSoftmaxesPass,
 )
@@ -62,7 +63,6 @@
 )
 from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass
 from executorch.exir import ExportedProgram
-from executorch.exir.backend.compile_spec_schema import CompileSpec
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_manager import PassManager
 
@@ -72,9 +72,7 @@ class ArmPassManager(PassManager):
     def _transform(self, graph_module: torch.fx.GraphModule):
         return self(graph_module).graph_module
 
-    def transform_to_backend_pipeline(
-        self, exported_program: ExportedProgram, compile_spec: list[CompileSpec]
-    ):
+    def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
         """Apply passes before transforming program to backend"""
         self.add_pass(FuseQuantizedActivationPass())
         self.add_pass(DecomposeLinearPass())
@@ -137,11 +135,8 @@ def transform_to_backend_pipeline(
         self.add_pass(KeepDimsFalseToSqueezePass())
         self.add_pass(Conv1dUnsqueezePass(exported_program))
         self.add_pass(DecomposeSoftmaxesPass())
-        for spec in compile_spec:
-            if spec.key == "permute_memory_format":
-                memory_format = spec.value.decode()
-                if memory_format == "nhwc":
-                    self.add_pass(AnnotateChannelsLastDimOrder())
+        self.add_pass(DecomposeSelectPass())
+        self.add_pass(AnnotateChannelsLastDimOrder())
 
         return self._transform(exported_program.graph_module)
 
diff --git a/backends/arm/_passes/decompose_select.py b/backends/arm/_passes/decompose_select.py
new file mode 100644
index 00000000000..9ea836e6336
--- /dev/null
+++ b/backends/arm/_passes/decompose_select.py
@@ -0,0 +1,56 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import torch
+from executorch.backends.arm._passes.arm_pass_utils import create_node
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+
+
+class DecomposeSelectPass(ExportPass):
+    """
+    This pass decomposes select into slice + squeeze to ensure that Aten and TOSA outputs has the same rank (input rank -1)
+    """
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        for node in graph_module.graph.nodes:
+
+            if node.op != "call_function":
+                continue
+
+            if node.target in (
+                exir_ops.edge.aten.select.int,
+                exir_ops.edge.aten.select_copy.int,
+            ):
+                slice_op = exir_ops.edge.aten.slice_copy.Tensor
+                squeeze_op = exir_ops.edge.aten.squeeze_copy.dims
+            else:
+                continue
+
+            input_node, dim, index = node.args
+
+            rank = len(input_node.meta["val"].size())
+            dim = dim % rank if dim < 0 else dim
+            index = index % rank if index < 0 else index
+            dim_list = list(range(rank))
+
+            with graph_module.graph.inserting_before(node):
+                slice_node = create_node(
+                    graph_module.graph, slice_op, (input_node, dim, index, index + 1)
+                )
+                squeeze_node = create_node(
+                    graph_module.graph, squeeze_op, (slice_node, dim_list)
+                )
+
+            node.replace_all_uses_with(squeeze_node)
+            graph_module.graph.erase_node(node)
+
+        graph_module.graph.eliminate_dead_code()
+        graph_module.recompile()
+        graph_module = super().call(graph_module).graph_module
+        return PassResult(graph_module, True)
diff --git a/backends/arm/arm_backend.py b/backends/arm/arm_backend.py
index e2fdc42b113..4ce95fda430 100644
--- a/backends/arm/arm_backend.py
+++ b/backends/arm/arm_backend.py
@@ -1,4 +1,4 @@
-# Copyright 2023-2024 Arm Limited and/or its affiliates.
+# Copyright 2023-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -49,8 +49,6 @@ def __init__(self):
         self.compiler_flags = []
         self.output_format = None
         self.path_for_intermediates = None
-        # TODO MLETORCH-265 Remove permute_nhwc flag
-        self.permute_nhwc = False
         self.quantize_io = False
         self.tosa_version = None
         self.input_order = None
@@ -118,16 +116,6 @@ def dump_intermediate_artifacts_to(
         self.path_for_intermediates = output_path
         return self
 
-    def set_permute_memory_format(
-        self, set_nhwc_permutation: bool = True
-    ) -> "ArmCompileSpecBuilder":
-        """
-        Permute to channel last in compiler and runtime. Compilation and
-        runtime will convert rank 4 inputs to channel last for each sub-graph.
-        """
-        self.permute_nhwc = set_nhwc_permutation
-        return self
-
     def set_quantize_io(self, quantize_io: bool = False) -> "ArmCompileSpecBuilder":
         """
         Quantization of inputs and dequantization of outputs for cases where
@@ -170,11 +158,6 @@ def build(self) -> List[CompileSpec]:
                 CompileSpec("debug_artifact_path", self.path_for_intermediates.encode())
             )
 
-        if self.permute_nhwc:
-            self.compile_spec.append(
-                CompileSpec("permute_memory_format", "nhwc".encode())
-            )
-
         if self.input_order:
             self.compile_spec.append(
                 CompileSpec(
@@ -188,13 +171,6 @@ def build(self) -> List[CompileSpec]:
         return self.compile_spec
 
 
-def is_permute_memory(compile_spec: List[CompileSpec]) -> bool:
-    for spec in compile_spec:
-        if spec.key == "permute_memory_format":
-            return spec.value.decode() == "nhwc"
-    return False
-
-
 def is_tosa(compile_spec: List[CompileSpec]) -> bool:
     for spec in compile_spec:
         if spec.key == "output_format":
@@ -264,7 +240,7 @@ def preprocess(  # noqa: C901
         # const data directly. Path created and data written only in debug builds.
         tosa_graph = ts.TosaSerializer(artifact_path)
         graph_module = ArmPassManager().transform_to_backend_pipeline(
-            exported_program=edge_program, compile_spec=compile_spec
+            exported_program=edge_program
         )
 
         node_visitors = get_node_visitors(edge_program, tosa_spec)
diff --git a/backends/arm/operators/__init__.py b/backends/arm/operators/__init__.py
index ee5f2807a9a..157e5ec0923 100644
--- a/backends/arm/operators/__init__.py
+++ b/backends/arm/operators/__init__.py
@@ -30,7 +30,6 @@
     op_repeat,
     op_rshift,
     op_rsqrt,
-    op_select,
     op_sigmoid,
     op_slice,
     op_squeeze,
diff --git a/backends/arm/operators/op_select.py b/backends/arm/operators/op_select.py
deleted file mode 100644
index b047a5dd47d..00000000000
--- a/backends/arm/operators/op_select.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-# pyre-unsafe
-
-from typing import List
-
-import serializer.tosa_serializer as ts
-from executorch.backends.arm.operators.node_visitor import (
-    NodeVisitor,
-    register_node_visitor,
-)
-
-from executorch.backends.arm.tosa_mapping import TosaArg
-
-from executorch.backends.arm.tosa_utils import build_reshape, tosa_shape
-from serializer.tosa_serializer import TosaOp
-from torch.fx import Node
-
-
-@register_node_visitor
-class SelectVisitor(NodeVisitor):
-    target = "aten.select_copy.int"
-
-    def __init__(self, *args):
-        super().__init__(*args)
-
-    def define_node(
-        self,
-        node: Node,
-        tosa_graph: ts.TosaSerializer,
-        inputs: List[TosaArg],
-        output: TosaArg,
-    ) -> None:
-
-        assert len(inputs) == 3
-        input_node, dim, index = inputs
-        shape = input_node.shape
-        rank = len(shape)
-
-        dim = dim.number % rank if dim.number < 0 else dim.number
-        index = index.number % rank if index.number < 0 else index.number
-
-        # For aten.select_copy, the output will be rank[input_shape - 1]
-        # For TOSA rank(in) == rank(out).
-        # Add an intermediate with the same rank
-        expanded_shape = tuple(1 if i == dim else shape[i] for i in range(rank))
-        expanded_shape = tosa_shape(expanded_shape, input_node.dim_order)
-
-        output_reshaped = tosa_graph.addIntermediate(expanded_shape, output.dtype)
-
-        attr_slice = ts.TosaSerializerAttribute()
-
-        start_attr = [index if i == dim else 0 for i in input_node.dim_order]
-        size_attr = [
-            1 if i == dim else input_node.shape[i] for i in input_node.dim_order
-        ]
-
-        attr_slice.SliceAttribute(start_attr, size_attr)
-
-        tosa_graph.addOperator(
-            TosaOp.Op().SLICE, [input_node.name], [output_reshaped.name], attr_slice
-        )
-
-        # Reshape back to original rank of output.
-        build_reshape(tosa_graph, output_reshaped.name, output.shape, output.name)
diff --git a/backends/arm/runtime/ArmBackendEthosU.cpp b/backends/arm/runtime/ArmBackendEthosU.cpp
index 2cc716391bc..843e48603b6 100644
--- a/backends/arm/runtime/ArmBackendEthosU.cpp
+++ b/backends/arm/runtime/ArmBackendEthosU.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2023-2024 Arm Limited and/or its affiliates.
+ * Copyright 2023-2025 Arm Limited and/or its affiliates.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
@@ -76,7 +76,6 @@ namespace arm {
 
 typedef struct {
   FreeableBuffer* processed;
-  bool permuted_io_flag;
 } ExecutionHandle;
 
 extern "C" {
@@ -125,14 +124,6 @@ class ArmBackend final : public ::executorch::runtime::BackendInterface {
         ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(allocator, ExecutionHandle);
     handle->processed = processed;
 
-    handle->permuted_io_flag = false;
-    for (auto& compile_spec : compile_specs) {
-      if (0 == std::strcmp(compile_spec.key, "permute_memory_format") &&
-          0 == std::memcmp(compile_spec.value.buffer, "nhwc", 4)) {
-        handle->permuted_io_flag = true;
-      }
-    }
-
     // Return the same buffer we were passed - this data will be
     // executed directly
     return handle;
@@ -225,11 +216,7 @@ class ArmBackend final : public ::executorch::runtime::BackendInterface {
       // which require permutation.
       bool permuted_input_shape;
       ET_CHECK_OK_OR_RETURN_ERROR(check_requires_permute(
-          i,
-          tensor_in,
-          &handles.inputs->io[i],
-          execution_handle->permuted_io_flag,
-          &permuted_input_shape));
+          i, tensor_in, &handles.inputs->io[i], &permuted_input_shape));
       bool both_char = tensor_in.scalar_type() == ScalarType::Char and
           handles.inputs->io[i].elem_size == 1;
       bool both_int = tensor_in.scalar_type() == ScalarType::Int and
@@ -330,11 +317,7 @@ class ArmBackend final : public ::executorch::runtime::BackendInterface {
 
       bool permuted_output_shape;
       ET_CHECK_OK_OR_RETURN_ERROR(check_requires_permute(
-          i,
-          tensor_out,
-          &handles.outputs->io[i],
-          execution_handle->permuted_io_flag,
-          &permuted_output_shape));
+          i, tensor_out, &handles.outputs->io[i], &permuted_output_shape));
       if (tensor_out.scalar_type() == ScalarType::Char and
           permuted_output_shape) {
         EXECUTORCH_PROF_SCOPE(
@@ -395,7 +378,6 @@ class ArmBackend final : public ::executorch::runtime::BackendInterface {
       int index,
       const executorch::aten::Tensor tensor,
       VelaIO* io,
-      bool permuted_io_flag,
       bool* is_permuted) const {
     bool permuted_shape = false;
 
@@ -409,12 +391,6 @@ class ArmBackend final : public ::executorch::runtime::BackendInterface {
       if (permuted_shape) {
         ET_LOG(Debug, "Tensor input/output %d will be permuted", index);
       }
-      if (permuted_io_flag != permuted_shape) {
-        ET_LOG(
-            Error,
-            "Permute compile flag and permuted input/output don't agree");
-        return Error::InvalidProgram;
-      }
     }
     *is_permuted = permuted_shape;
     return Error::Ok;
diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py
index 8838cb72d6c..ba80f1c2d7c 100644
--- a/backends/arm/test/common.py
+++ b/backends/arm/test/common.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -56,19 +56,15 @@ def maybe_get_tosa_collate_path() -> str | None:
     return None
 
 
-def get_tosa_compile_spec(
-    tosa_version: str, permute_memory_to_nhwc=True, custom_path=None
-) -> list[CompileSpec]:
+def get_tosa_compile_spec(tosa_version: str, custom_path=None) -> list[CompileSpec]:
     """
     Default compile spec for TOSA tests.
     """
-    return get_tosa_compile_spec_unbuilt(
-        tosa_version, permute_memory_to_nhwc, custom_path
-    ).build()
+    return get_tosa_compile_spec_unbuilt(tosa_version, custom_path).build()
 
 
 def get_tosa_compile_spec_unbuilt(
-    tosa_version: str, permute_memory_to_nhwc=False, custom_path=None
+    tosa_version: str, custom_path=None
 ) -> ArmCompileSpecBuilder:
     """Get the ArmCompileSpecBuilder for the default TOSA tests, to modify
     the compile spec before calling .build() to finalize it.
@@ -81,7 +77,6 @@ def get_tosa_compile_spec_unbuilt(
     compile_spec_builder = (
         ArmCompileSpecBuilder()
         .tosa_compile_spec(tosa_version)
-        .set_permute_memory_format(permute_memory_to_nhwc)
         .dump_intermediate_artifacts_to(custom_path)
     )
 
@@ -89,7 +84,6 @@ def get_tosa_compile_spec_unbuilt(
 
 
 def get_u55_compile_spec(
-    permute_memory_to_nhwc=True,
     quantize_io=False,
     custom_path=None,
     reorder_inputs=None,
@@ -98,7 +92,6 @@ def get_u55_compile_spec(
     Default compile spec for Ethos-U55 tests.
     """
     return get_u55_compile_spec_unbuilt(
-        permute_memory_to_nhwc,
         quantize_io=quantize_io,
         custom_path=custom_path,
         reorder_inputs=reorder_inputs,
@@ -106,7 +99,6 @@ def get_u55_compile_spec(
 
 
 def get_u85_compile_spec(
-    permute_memory_to_nhwc=True,
     quantize_io=False,
     custom_path=None,
     reorder_inputs=None,
@@ -115,7 +107,6 @@ def get_u85_compile_spec(
     Default compile spec for Ethos-U85 tests.
     """
     return get_u85_compile_spec_unbuilt(
-        permute_memory_to_nhwc,
         quantize_io=quantize_io,
         custom_path=custom_path,
         reorder_inputs=reorder_inputs,
@@ -123,7 +114,6 @@ def get_u85_compile_spec(
 
 
 def get_u55_compile_spec_unbuilt(
-    permute_memory_to_nhwc=True,
     quantize_io=False,
     custom_path=None,
     reorder_inputs=None,
@@ -143,7 +133,6 @@ def get_u55_compile_spec_unbuilt(
             extra_flags="--debug-force-regor --output-format=raw",
         )
         .set_quantize_io(is_option_enabled("quantize_io") or quantize_io)
-        .set_permute_memory_format(permute_memory_to_nhwc)
         .dump_intermediate_artifacts_to(artifact_path)
         .set_input_order(reorder_inputs)
     )
@@ -151,7 +140,6 @@ def get_u55_compile_spec_unbuilt(
 
 
 def get_u85_compile_spec_unbuilt(
-    permute_memory_to_nhwc=True,
     quantize_io=False,
     custom_path=None,
     reorder_inputs=None,
@@ -169,7 +157,6 @@ def get_u85_compile_spec_unbuilt(
             extra_flags="--output-format=raw",
         )
         .set_quantize_io(is_option_enabled("quantize_io") or quantize_io)
-        .set_permute_memory_format(permute_memory_to_nhwc)
         .dump_intermediate_artifacts_to(artifact_path)
         .set_input_order(reorder_inputs)
     )
diff --git a/backends/arm/test/misc/test_debug_feats.py b/backends/arm/test/misc/test_debug_feats.py
index b5ff882537b..b2fc271aded 100644
--- a/backends/arm/test/misc/test_debug_feats.py
+++ b/backends/arm/test/misc/test_debug_feats.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -111,7 +111,6 @@ def test_numerical_diff_prints(self):
                 example_inputs=model.get_inputs(),
                 compile_spec=common.get_tosa_compile_spec(
                     "TOSA-0.80+MI",
-                    permute_memory_to_nhwc=True,
                     custom_path=tempfile.mkdtemp("diff_print_test"),
                 ),
             )
diff --git a/backends/arm/test/models/test_mobilenet_v2_arm.py b/backends/arm/test/models/test_mobilenet_v2_arm.py
index fca743a6fa2..d29695dedf3 100644
--- a/backends/arm/test/models/test_mobilenet_v2_arm.py
+++ b/backends/arm/test/models/test_mobilenet_v2_arm.py
@@ -1,5 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -56,9 +56,7 @@ def test_mv2_tosa_MI(self):
             ArmTester(
                 self.mv2,
                 example_inputs=self.model_inputs,
-                compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+MI", permute_memory_to_nhwc=True
-                ),
+                compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"),
             )
             .export()
             .to_edge_transform_and_lower(edge_compile_config=self._edge_compile_config)
@@ -71,9 +69,7 @@ def test_mv2_tosa_BI(self):
             ArmTester(
                 self.mv2,
                 example_inputs=self.model_inputs,
-                compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+BI", permute_memory_to_nhwc=True
-                ),
+                compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"),
             )
             .quantize()
             .export()
@@ -92,7 +88,7 @@ def test_mv2_u55_BI(self):
             ArmTester(
                 self.mv2,
                 example_inputs=self.model_inputs,
-                compile_spec=common.get_u55_compile_spec(permute_memory_to_nhwc=True),
+                compile_spec=common.get_u55_compile_spec(),
             )
             .quantize()
             .export()
@@ -112,7 +108,7 @@ def test_mv2_u85_BI(self):
             ArmTester(
                 self.mv2,
                 example_inputs=self.model_inputs,
-                compile_spec=common.get_u85_compile_spec(permute_memory_to_nhwc=True),
+                compile_spec=common.get_u85_compile_spec(),
             )
             .quantize()
             .export()
diff --git a/backends/arm/test/ops/test_add.py b/backends/arm/test/ops/test_add.py
index 24faace0070..0aa3c6cba9e 100644
--- a/backends/arm/test/ops/test_add.py
+++ b/backends/arm/test/ops/test_add.py
@@ -1,5 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -137,7 +137,7 @@ def test_add_u55_BI(self, test_data: torch.Tensor):
         test_data = (test_data,)
         self._test_add_ethos_BI_pipeline(
             self.Add(),
-            common.get_u55_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u55_compile_spec(),
             test_data,
         )
 
@@ -147,7 +147,7 @@ def test_add_u85_BI(self, test_data: torch.Tensor):
         test_data = (test_data,)
         self._test_add_ethos_BI_pipeline(
             self.Add(),
-            common.get_u85_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u85_compile_spec(),
             test_data,
         )
 
diff --git a/backends/arm/test/ops/test_avg_pool.py b/backends/arm/test/ops/test_avg_pool.py
index 27629701c32..bc37fbb1364 100644
--- a/backends/arm/test/ops/test_avg_pool.py
+++ b/backends/arm/test/ops/test_avg_pool.py
@@ -1,5 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -57,9 +57,7 @@ def _test_avgpool2d_tosa_MI_pipeline(
             ArmTester(
                 module,
                 example_inputs=test_data,
-                compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+MI", permute_memory_to_nhwc=True
-                ),
+                compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"),
             )
             .export()
             .check(["torch.ops.aten.avg_pool2d.default"])
@@ -81,7 +79,7 @@ def _test_avgpool2d_tosa_BI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+BI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+BI",
                 ),
             )
             .quantize(Quantize(quantizer, get_symmetric_quantization_config()))
@@ -155,7 +153,7 @@ def test_avgpool2d_tosa_u55_BI(
     ):
         self._test_avgpool2d_tosa_ethos_BI_pipeline(
             self.AvgPool2d(*model_params),
-            common.get_u55_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u55_compile_spec(),
             (test_data,),
         )
 
@@ -169,6 +167,6 @@ def test_avgpool2d_tosa_u85_BI(
     ):
         self._test_avgpool2d_tosa_ethos_BI_pipeline(
             self.AvgPool2d(*model_params),
-            common.get_u85_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u85_compile_spec(),
             (test_data,),
         )
diff --git a/backends/arm/test/ops/test_bmm.py b/backends/arm/test/ops/test_bmm.py
index 0b830fa46bb..06470d91e82 100644
--- a/backends/arm/test/ops/test_bmm.py
+++ b/backends/arm/test/ops/test_bmm.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -159,7 +159,7 @@ def test_bmm_u55_BI_xfails(self, operand1: torch.Tensor, operand2: torch.Tensor)
             self.BMM(), common.get_u55_compile_spec(), test_data
         )
 
-    @parameterized.expand(BMM.test_parameters[:1])
+    @parameterized.expand(BMM.test_parameters)
     @pytest.mark.corstone_fvp
     def test_bmm_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
         test_data = (operand1, operand2)
@@ -167,15 +167,6 @@ def test_bmm_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
             self.BMM(), common.get_u85_compile_spec(), test_data
         )
 
-    @parameterized.expand(BMM.test_parameters[1:])
-    @pytest.mark.corstone_fvp
-    @conftest.expectedFailureOnFVP
-    def test_bmm_u85_BI_xfails(self, operand1: torch.Tensor, operand2: torch.Tensor):
-        test_data = (operand1, operand2)
-        self._test_bmm_ethosu_BI_pipeline(
-            self.BMM(), common.get_u85_compile_spec(), test_data
-        )
-
     # Expected to fail with error: Warning, unsupported fusing of TOSA Rescale previous operator is of type: Memcpy
     @parameterized.expand(BMMSingleInput.test_parameters)
     @pytest.mark.corstone_fvp
diff --git a/backends/arm/test/ops/test_conv1d.py b/backends/arm/test/ops/test_conv1d.py
index 593260ac56f..b754a91f36f 100644
--- a/backends/arm/test/ops/test_conv1d.py
+++ b/backends/arm/test/ops/test_conv1d.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -228,7 +228,7 @@ def _test_conv1d_tosa_MI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+MI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+MI",
                 ),
             )
             .export()
@@ -250,7 +250,7 @@ def _test_conv1d_tosa_BI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+BI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+BI",
                 ),
             )
             .quantize()
@@ -291,18 +291,13 @@ def test_conv1d_tosa_MI(self, test_name, model):
     def test_conv1d_tosa_BI(self, test_name, model):
         self._test_conv1d_tosa_BI_pipeline(model, model.get_inputs())
 
-    # Expeted to fail as Conv1D requires transpoes which isn't supported on u55
     @parameterized.expand(testsuite)
     @pytest.mark.corstone_fvp
-    @unittest.expectedFailure
     def test_conv1d_u55_BI(self, test_name, model):
         self._test_conv1d_ethosu_BI_pipeline(
             model, common.get_u55_compile_spec(), model.get_inputs()
         )
 
-    # This specific test case has numerical errors on FVP, MLETORCH-520.
-    testsuite.remove(("5_3x2x128_st1", conv1d_5_3x2x128_st1))
-
     @parameterized.expand(testsuite)
     @pytest.mark.corstone_fvp
     def test_conv1d_u85_BI(self, test_name, model):
diff --git a/backends/arm/test/ops/test_conv2d.py b/backends/arm/test/ops/test_conv2d.py
index 9ccac539408..bbcb421ce7a 100644
--- a/backends/arm/test/ops/test_conv2d.py
+++ b/backends/arm/test/ops/test_conv2d.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -255,7 +255,7 @@ def _test_conv2d_tosa_MI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+MI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+MI",
                 ),
             )
             .export()
@@ -277,7 +277,7 @@ def _test_conv2d_tosa_BI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+BI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+BI",
                 ),
             )
             .quantize()
@@ -330,7 +330,7 @@ def test_conv2d_tosa_BI(self, test_name, model):
     @pytest.mark.corstone_fvp
     def test_conv2d_u55_BI(self, test_name, model):
         self._test_conv2d_ethosu_BI_pipeline(
-            common.get_u55_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u55_compile_spec(),
             model,
             model.get_inputs(),
         )
@@ -339,7 +339,7 @@ def test_conv2d_u55_BI(self, test_name, model):
     @pytest.mark.corstone_fvp
     def test_conv2d_u85_BI(self, test_name, model):
         self._test_conv2d_ethosu_BI_pipeline(
-            common.get_u85_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u85_compile_spec(),
             model,
             model.get_inputs(),
         )
diff --git a/backends/arm/test/ops/test_conv_combos.py b/backends/arm/test/ops/test_conv_combos.py
index 4a5615f97c6..8352727a1c3 100644
--- a/backends/arm/test/ops/test_conv_combos.py
+++ b/backends/arm/test/ops/test_conv_combos.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -194,7 +194,7 @@ def _test_conv_combo_tosa_MI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+MI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+MI",
                 ),
             )
             .export()
@@ -218,7 +218,7 @@ def _test_conv_combo_tosa_BI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+BI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+BI",
                 ),
             )
             .quantize()
@@ -273,7 +273,7 @@ def test_conv_meandim_u55_BI(self):
         model = ComboConv2dMeandim()
         self._test_conv_combo_ethos_BI_pipeline(
             model,
-            common.get_u55_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u55_compile_spec(),
             model.get_inputs(),
         )
 
@@ -282,7 +282,7 @@ def test_conv_meandim_u85_BI(self):
         model = ComboConv2dMeandim()
         self._test_conv_combo_ethos_BI_pipeline(
             model,
-            common.get_u85_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u85_compile_spec(),
             model.get_inputs(),
         )
 
@@ -334,7 +334,7 @@ def test_conv_relu6_u55_BI(self, test_data: torch.Tensor):
         model = ComboConvRelu6()
         test_data = (test_data,)
         self._test_conv_combo_ethos_BI_pipeline(
-            model, common.get_u55_compile_spec(permute_memory_to_nhwc=True), test_data
+            model, common.get_u55_compile_spec(), test_data
         )
 
     @parameterized.expand(ComboConvRelu6.test_data)
@@ -343,7 +343,7 @@ def test_conv_relu6_u85_BI(self, test_data: torch.Tensor):
         model = ComboConvRelu6()
         test_data = (test_data,)
         self._test_conv_combo_ethos_BI_pipeline(
-            model, common.get_u85_compile_spec(permute_memory_to_nhwc=True), test_data
+            model, common.get_u85_compile_spec(), test_data
         )
 
     ###############################
@@ -364,7 +364,7 @@ def test_block_bottleneck_residual_u55_BI(self):
         model = ComboBlockBottleneckResidual()
         self._test_conv_combo_ethos_BI_pipeline(
             model,
-            common.get_u55_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u55_compile_spec(),
             model.get_inputs(),
         )
 
@@ -373,7 +373,7 @@ def test_block_bottleneck_residual_u85_BI(self):
         model = ComboBlockBottleneckResidual()
         self._test_conv_combo_ethos_BI_pipeline(
             model,
-            common.get_u85_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u85_compile_spec(),
             model.get_inputs(),
         )
 
diff --git a/backends/arm/test/ops/test_depthwise_conv.py b/backends/arm/test/ops/test_depthwise_conv.py
index 3ce75840864..e183dcc9c6b 100644
--- a/backends/arm/test/ops/test_depthwise_conv.py
+++ b/backends/arm/test/ops/test_depthwise_conv.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -152,9 +152,9 @@
 testsuite_conv2d = [
     ("2x2_1x6x4x4_gp6_st1", dw_conv2d_2x2_1x6x4x4_gp6_st1),
     ("3x3_1x3x256x256_gp3_st1", dw_conv2d_3x3_1x3x256x256_gp3_st1),
+    ("3x3_1x4x256x256_gp4_nobias", dw_conv2d_3x3_1x4x256x256_gp4_nobias),
     ("3x3_1x4x256x256_gp4_st1", dw_conv2d_3x3_1x4x256x256_gp4_st1),
     ("3x3_2x8x198x198_gp8_st3", dw_conv2d_3x3_2x8x198x198_gp8_st3),
-    ("3x3_1x4x256x256_gp4_nobias", dw_conv2d_3x3_1x4x256x256_gp4_nobias),
     ("two_dw_conv2d", two_dw_conv2d),
 ]
 
@@ -191,7 +191,7 @@ def _test_dw_conv_tosa_MI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+MI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+MI",
                 ),
             )
             .export()
@@ -211,7 +211,7 @@ def _test_dw_conv_tosa_BI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+BI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+BI",
                 ),
             )
             .quantize()
@@ -257,37 +257,37 @@ def test_dw_conv_tosa_MI(self, test_name: str, model: torch.nn.Module):
     def test_dw_conv_tosa_BI(self, test_name: str, model: torch.nn.Module):
         self._test_dw_conv_tosa_BI_pipeline(model, model.get_inputs())
 
-    testsuite_conv2d.remove(
-        ("3x3_1x3x256x256_gp3_st1", dw_conv2d_3x3_1x3x256x256_gp3_st1)
-    )  # Works
-
-    @parameterized.expand(testsuite_conv2d, skip_on_empty=True)
+    @parameterized.expand(testsuite_conv2d[:4], skip_on_empty=True)
     @pytest.mark.corstone_fvp
-    @unittest.expectedFailure
     def test_dw_conv2d_u55_BI(
         self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = False
     ):
         self._test_dw_conv_ethos_BI_pipeline(
             model,
-            common.get_u55_compile_spec(
-                permute_memory_to_nhwc=True, quantize_io=set_quantize_io
-            ),
+            common.get_u55_compile_spec(quantize_io=set_quantize_io),
+            model.get_inputs(),
+        )
+
+    @parameterized.expand(testsuite_conv2d[4:], skip_on_empty=True)
+    @pytest.mark.corstone_fvp
+    @unittest.expectedFailure  # TODO: MLETORCH-516
+    def test_dw_conv2d_u55_BI_xfails(
+        self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = False
+    ):
+        self._test_dw_conv_ethos_BI_pipeline(
+            model,
+            common.get_u55_compile_spec(quantize_io=set_quantize_io),
             model.get_inputs(),
         )
 
-    # Expected to fail as conv1d needs transpose which is not supported
-    # on u55.
     @parameterized.expand(testsuite_conv1d, skip_on_empty=True)
     @pytest.mark.corstone_fvp
-    @unittest.expectedFailure
     def test_dw_conv1d_u55_BI(
         self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = False
     ):
         self._test_dw_conv_ethos_BI_pipeline(
             model,
-            common.get_u55_compile_spec(
-                permute_memory_to_nhwc=True, quantize_io=set_quantize_io
-            ),
+            common.get_u55_compile_spec(quantize_io=set_quantize_io),
             model.get_inputs(),
         )
 
@@ -298,9 +298,7 @@ def test_dw_conv_u85_BI(
     ):
         self._test_dw_conv_ethos_BI_pipeline(
             model,
-            common.get_u85_compile_spec(
-                permute_memory_to_nhwc=True, quantize_io=set_quantize_io
-            ),
+            common.get_u85_compile_spec(quantize_io=set_quantize_io),
             model.get_inputs(),
         )
 
@@ -313,8 +311,6 @@ def test_dw_conv_u85_BI_xfails(
     ):
         self._test_dw_conv_ethos_BI_pipeline(
             model,
-            common.get_u85_compile_spec(
-                permute_memory_to_nhwc=True, quantize_io=set_quantize_io
-            ),
+            common.get_u85_compile_spec(quantize_io=set_quantize_io),
             model.get_inputs(),
         )
diff --git a/backends/arm/test/ops/test_div.py b/backends/arm/test/ops/test_div.py
index d5f61744692..062dbfacaef 100644
--- a/backends/arm/test/ops/test_div.py
+++ b/backends/arm/test/ops/test_div.py
@@ -1,5 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -185,7 +185,7 @@ def test_div_tosa_BI(
         test_data = (input_, other_)
         self._test_div_tosa_BI_pipeline(self.Div(), test_data)
 
-    @parameterized.expand(test_data_suite[:2])
+    @parameterized.expand(test_data_suite[:3])
     @pytest.mark.corstone_fvp
     def test_div_u55_BI(
         self,
@@ -200,7 +200,7 @@ def test_div_u55_BI(
         )
 
     # Numerical issues on FVP likely due to mul op, MLETORCH-521
-    @parameterized.expand(test_data_suite[2:])
+    @parameterized.expand(test_data_suite[3:])
     @pytest.mark.corstone_fvp
     @conftest.expectedFailureOnFVP
     def test_div_u55_BI_xfails(
@@ -215,7 +215,7 @@ def test_div_u55_BI_xfails(
             self.Div(), common.get_u55_compile_spec(), test_data
         )
 
-    @parameterized.expand(test_data_suite[:2])
+    @parameterized.expand(test_data_suite[:3])
     @pytest.mark.corstone_fvp
     def test_div_u85_BI(
         self,
@@ -230,7 +230,7 @@ def test_div_u85_BI(
         )
 
     # Numerical issues on FVP likely due to mul op, MLETORCH-521
-    @parameterized.expand(test_data_suite[2:])
+    @parameterized.expand(test_data_suite[3:])
     @pytest.mark.corstone_fvp
     @conftest.expectedFailureOnFVP
     def test_div_u85_BI_xfails(
diff --git a/backends/arm/test/ops/test_full.py b/backends/arm/test/ops/test_full.py
index 1b6d6e6ae39..fc82fa4dd71 100644
--- a/backends/arm/test/ops/test_full.py
+++ b/backends/arm/test/ops/test_full.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -76,15 +76,12 @@ def _test_full_tosa_BI_pipeline(
         self,
         module: torch.nn.Module,
         test_data: Tuple,
-        permute_memory_to_nhwc: bool,
     ):
         (
             ArmTester(
                 module,
                 example_inputs=test_data,
-                compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+BI", permute_memory_to_nhwc=permute_memory_to_nhwc
-                ),
+                compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"),
             )
             .quantize()
             .export()
@@ -134,7 +131,7 @@ def test_const_full_tosa_MI(self):
 
     def test_const_full_nhwc_tosa_BI(self):
         _input = torch.rand((2, 2, 3, 3)) * 10
-        self._test_full_tosa_BI_pipeline(self.AddConstFull(), (_input,), True)
+        self._test_full_tosa_BI_pipeline(self.AddConstFull(), (_input,))
 
     @parameterized.expand(AddVariableFull.test_parameters)
     def test_full_tosa_MI(self, test_tensor: Tuple):
@@ -144,7 +141,7 @@ def test_full_tosa_MI(self, test_tensor: Tuple):
 
     @parameterized.expand(AddVariableFull.test_parameters)
     def test_full_tosa_BI(self, test_tensor: Tuple):
-        self._test_full_tosa_BI_pipeline(self.AddVariableFull(), test_tensor, False)
+        self._test_full_tosa_BI_pipeline(self.AddVariableFull(), test_tensor)
 
     # Mismatch in provided number of inputs and model signature, MLETORCH 519
     @parameterized.expand(AddVariableFull.test_parameters)
diff --git a/backends/arm/test/ops/test_layer_norm.py b/backends/arm/test/ops/test_layer_norm.py
index 2d88421fb56..0570afc03e7 100644
--- a/backends/arm/test/ops/test_layer_norm.py
+++ b/backends/arm/test/ops/test_layer_norm.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -78,7 +78,7 @@ def _test_layernorm_tosa_MI_pipeline(
                 model=module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+MI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+MI",
                 ),
             )
             .export()
@@ -99,7 +99,7 @@ def _test_layernorm_tosa_BI_pipeline(
                 model=module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+BI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+BI",
                 ),
             )
             .quantize()
@@ -158,9 +158,21 @@ def test_layer_norm_tosa_BI(
             self.LayerNorm(*model_params), (test_data,)
         )
 
+    @parameterized.expand(test_data_suite[4:])
+    @pytest.mark.corstone_fvp
+    def test_layer_norm_u55_BI(
+        self,
+        test_name: str,
+        test_data: torch.Tensor,
+        model_params,
+    ):
+        self._test_layernorm_ethosu_BI_pipeline(
+            self.LayerNorm(*model_params), common.get_u55_compile_spec(), (test_data,)
+        )
+
     # Numerical issues on FVP likely due to mul op, MLETORCH-521
     # Skip tests that require transposes.
-    @parameterized.expand(test_data_suite)
+    @parameterized.expand(test_data_suite[:4])
     @pytest.mark.corstone_fvp
     @unittest.expectedFailure
     def test_layer_norm_u55_BI_xfails(
diff --git a/backends/arm/test/ops/test_linear.py b/backends/arm/test/ops/test_linear.py
index cd14b7801d2..825b2f9bc93 100644
--- a/backends/arm/test/ops/test_linear.py
+++ b/backends/arm/test/ops/test_linear.py
@@ -1,5 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -137,7 +137,7 @@ def _test_linear_tosa_MI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+MI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+MI",
                 ),
             )
             .export()
@@ -157,7 +157,7 @@ def _test_linear_tosa_BI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+BI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+BI",
                 ),
             )
             .quantize()
diff --git a/backends/arm/test/ops/test_logsoftmax.py b/backends/arm/test/ops/test_logsoftmax.py
index 69c8ee06ecf..d1581423a0a 100644
--- a/backends/arm/test/ops/test_logsoftmax.py
+++ b/backends/arm/test/ops/test_logsoftmax.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -31,10 +31,6 @@
     ("ones", torch.ones(10, 10), 1),
     ("ones_neg_dim", torch.ones(10, 3, 4), -1),
     ("randn_neg_dim", torch.randn(10, 5, 8, 7), -3),
-]
-
-test_data_suite_u55_xfails = [
-    # (test_name, test_data, dim)
     ("zeros", torch.zeros(10, 8, 5, 2), 0),
     ("zeros_neg_dim", torch.zeros(10, 7, 8, 9), -4),
     ("rand", torch.rand(1, 2, 5, 8), 2),
@@ -161,19 +157,6 @@ def test_logsoftmax_tosa_u55_BI(
             self.LogSoftmax(dim=dim), (test_data,)
         )
 
-    # Expected to fail as this is not supported on u55.
-    @parameterized.expand(test_data_suite_u55_xfails)
-    @unittest.expectedFailure
-    def test_logsoftmax_tosa_u55_BI_xfails(
-        self,
-        test_name: str,
-        test_data: torch.Tensor,
-        dim: int,
-    ):
-        self._test_logsoftmax_tosa_u55_BI_pipeline(
-            self.LogSoftmax(dim=dim), (test_data,)
-        )
-
     @parameterized.expand(test_data_suite)
     def test_logsoftmax_tosa_u85_BI(
         self,
diff --git a/backends/arm/test/ops/test_max_pool.py b/backends/arm/test/ops/test_max_pool.py
index a693c7d5497..81f27beab45 100644
--- a/backends/arm/test/ops/test_max_pool.py
+++ b/backends/arm/test/ops/test_max_pool.py
@@ -1,5 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -65,7 +65,7 @@ def _test_maxpool2d_tosa_MI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+MI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+MI",
                 ),
             )
             .export()
@@ -92,7 +92,7 @@ def _test_maxpool2d_tosa_BI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+BI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+BI",
                 ),
             )
             .quantize(Quantize(quantizer, get_symmetric_quantization_config()))
@@ -171,7 +171,7 @@ def test_maxpool2d_tosa_u55_BI(
     ):
         tester = self._test_maxpool2d_tosa_ethos_BI_pipeline(
             self.MaxPool2d(*model_params),
-            common.get_u55_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u55_compile_spec(),
             (test_data,),
         )
         if conftest.is_option_enabled("corstone_fvp"):
@@ -189,7 +189,7 @@ def test_maxpool2d_tosa_u85_BI(
     ):
         tester = self._test_maxpool2d_tosa_ethos_BI_pipeline(
             self.MaxPool2d(*model_params),
-            common.get_u85_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u85_compile_spec(),
             (test_data,),
         )
         if conftest.is_option_enabled("corstone_fvp"):
@@ -230,7 +230,7 @@ def test_maxpool2d_tosa_u55_BI_mult_batches(
     ):
         tester = self._test_maxpool2d_tosa_ethos_BI_pipeline(
             self.MaxPool2d(*model_params),
-            common.get_u55_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u55_compile_spec(),
             (test_data,),
         )
         if conftest.is_option_enabled("corstone_fvp"):
@@ -249,7 +249,7 @@ def test_maxpool2d_tosa_u85_BI_mult_batches(
     ):
         tester = self._test_maxpool2d_tosa_ethos_BI_pipeline(
             self.MaxPool2d(*model_params),
-            common.get_u85_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u85_compile_spec(),
             (test_data,),
         )
         if conftest.is_option_enabled("corstone_fvp"):
diff --git a/backends/arm/test/ops/test_mean_dim.py b/backends/arm/test/ops/test_mean_dim.py
index e4f6afcbd63..393cf1667e0 100644
--- a/backends/arm/test/ops/test_mean_dim.py
+++ b/backends/arm/test/ops/test_mean_dim.py
@@ -1,5 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -269,10 +269,8 @@ def test_meandim_tosa_BI(
     ):
         self._test_meandim_tosa_BI_pipeline(self.MeanDim(dim, keepdim), (test_data,))
 
-    # Expected to fail as this is not supported on u55.
     @parameterized.expand(MeanDim.test_data_suite)
-    @unittest.expectedFailure
-    def test_meandim_tosa_u55_BI_xfails(
+    def test_meandim_tosa_u55_BI(
         self,
         test_name: str,
         test_data: torch.Tensor,
diff --git a/backends/arm/test/ops/test_mul.py b/backends/arm/test/ops/test_mul.py
index 9d789a8e336..715673b87c8 100644
--- a/backends/arm/test/ops/test_mul.py
+++ b/backends/arm/test/ops/test_mul.py
@@ -1,5 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -73,7 +73,7 @@ def _test_mul_tosa_MI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+MI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+MI",
                 ),
             )
             .export()
@@ -94,7 +94,7 @@ def _test_mul_tosa_BI_pipeline(
                 module,
                 example_inputs=test_data,
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+BI", permute_memory_to_nhwc=True
+                    "TOSA-0.80+BI",
                 ),
             )
             .quantize()
diff --git a/backends/arm/test/ops/test_permute.py b/backends/arm/test/ops/test_permute.py
index b373af1401b..ec7ecaa81b3 100644
--- a/backends/arm/test/ops/test_permute.py
+++ b/backends/arm/test/ops/test_permute.py
@@ -1,5 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -53,15 +53,12 @@ def _test_permute_tosa_MI_pipeline(
         self,
         module: torch.nn.Module,
         test_data: Tuple[torch.tensor],
-        permute_memory_to_nhwc: bool,
     ):
         (
             ArmTester(
                 module,
                 example_inputs=test_data,
-                compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+MI", permute_memory_to_nhwc=permute_memory_to_nhwc
-                ),
+                compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"),
             )
             .export()
             .check(["torch.ops.aten.permute.default"])
@@ -127,10 +124,8 @@ def _test_permute_ethos_BI_pipeline(
     def test_permute_tosa_MI(
         self, test_name: str, test_data: torch.Tensor, dims: list[int]
     ):
-        self._test_permute_tosa_MI_pipeline(self.Permute(dims=dims), (test_data,), True)
-        self._test_permute_tosa_MI_pipeline(
-            self.Permute(dims=dims), (test_data,), False
-        )
+        self._test_permute_tosa_MI_pipeline(self.Permute(dims=dims), (test_data,))
+        self._test_permute_tosa_MI_pipeline(self.Permute(dims=dims), (test_data,))
 
     @parameterized.expand(test_data_suite)
     def test_permute_tosa_BI(
@@ -141,7 +136,6 @@ def test_permute_tosa_BI(
     # Expected to fail as TOSA.Transpose is not supported by Ethos-U55.
     @parameterized.expand(test_data_suite[0:1])
     @pytest.mark.corstone_fvp
-    @unittest.expectedFailure
     def test_permute_u55_BI(
         self, test_name: str, test_data: torch.Tensor, dims: list[int]
     ):
diff --git a/backends/arm/test/ops/test_repeat.py b/backends/arm/test/ops/test_repeat.py
index f43f7af13c3..bad872792be 100644
--- a/backends/arm/test/ops/test_repeat.py
+++ b/backends/arm/test/ops/test_repeat.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -107,20 +107,12 @@ def test_repeat_tosa_MI(self, test_input, multiples):
     def test_repeat_tosa_BI(self, test_input, multiples):
         self._test_repeat_tosa_BI_pipeline(self.Repeat(), (test_input, multiples))
 
-    @parameterized.expand(Repeat.test_parameters[:-1])
+    @parameterized.expand(Repeat.test_parameters)
     def test_repeat_u55_BI(self, test_input, multiples):
         self._test_repeat_ethosu_pipeline(
             common.get_u55_compile_spec(), self.Repeat(), (test_input, multiples)
         )
 
-    # Final test requires transpose which is not supported on u55.
-    @parameterized.expand(Repeat.test_parameters[-1:])
-    @unittest.expectedFailure
-    def test_repeat_u55_BI_xfails(self, test_input, multiples):
-        self._test_repeat_ethosu_pipeline(
-            common.get_u55_compile_spec(), self.Repeat(), (test_input, multiples)
-        )
-
     @parameterized.expand(Repeat.test_parameters)
     def test_repeat_u85_BI(self, test_input, multiples):
         self._test_repeat_ethosu_pipeline(
diff --git a/backends/arm/test/ops/test_select.py b/backends/arm/test/ops/test_select.py
index c39b20a7318..b474da573f0 100644
--- a/backends/arm/test/ops/test_select.py
+++ b/backends/arm/test/ops/test_select.py
@@ -1,5 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -51,21 +51,19 @@ def _test_select_tosa_MI_pipeline(
         test_data: test_data_t,
         export_target: str,
     ):
-        # For 4D tensors, do not permute to NHWC
-        permute = False if len(test_data[0].shape) == 4 else True
         (
             ArmTester(
                 module,
                 example_inputs=test_data,
-                compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+MI", permute_memory_to_nhwc=permute
-                ),
+                compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"),
             )
             .export()
             .check([export_target])
             .check_not(["torch.ops.quantized_decomposed"])
             .to_edge()
+            .dump_artifact()
             .partition()
+            .dump_artifact()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
             .run_method_and_compare_outputs(inputs=test_data)
@@ -77,15 +75,11 @@ def _test_select_tosa_BI_pipeline(
         test_data: test_data_t,
         export_target: str,
     ):
-        # For 4D tensors, do not permute to NHWC
-        permute = False if len(test_data[0].shape) == 4 else True
         (
             ArmTester(
                 module,
                 example_inputs=test_data,
-                compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+BI", permute_memory_to_nhwc=permute
-                ),
+                compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"),
             )
             .quantize()
             .export()
@@ -124,10 +118,8 @@ def _test_select_ethos_BI_pipeline(
     def _test_select_tosa_u55_BI_pipeline(
         self, module: torch.nn.Module, test_data: test_data_t, export_target: str
     ):
-        # For 4D tensors, do not permute to NHWC
-        permute = False if len(test_data[0].shape) == 4 else True
         self._test_select_ethos_BI_pipeline(
-            common.get_u55_compile_spec(permute_memory_to_nhwc=permute),
+            common.get_u55_compile_spec(),
             module,
             test_data,
             export_target,
@@ -136,10 +128,8 @@ def _test_select_tosa_u55_BI_pipeline(
     def _test_select_tosa_u85_BI_pipeline(
         self, module: torch.nn.Module, test_data: test_data_t, export_target: str
     ):
-        # For 4D tensors, do not permute to NHWC
-        permute = False if len(test_data[0].shape) == 4 else True
         self._test_select_ethos_BI_pipeline(
-            common.get_u85_compile_spec(permute_memory_to_nhwc=permute),
+            common.get_u85_compile_spec(),
             module,
             test_data,
             export_target,
diff --git a/backends/arm/test/ops/test_slice.py b/backends/arm/test/ops/test_slice.py
index 511873a8c21..7cb82e3a828 100644
--- a/backends/arm/test/ops/test_slice.py
+++ b/backends/arm/test/ops/test_slice.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -52,16 +52,14 @@ def _test_slice_tosa_MI_pipeline(
         )
 
     def _test_slice_tosa_BI_pipeline(
-        self, module: torch.nn.Module, test_data: Tuple[torch.Tensor], permute: bool
+        self, module: torch.nn.Module, test_data: Tuple[torch.Tensor]
     ):
 
         (
             ArmTester(
                 module,
                 example_inputs=test_data,
-                compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80+BI", permute_memory_to_nhwc=permute
-                ),
+                compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"),
             )
             .quantize()
             .export()
@@ -114,11 +112,11 @@ def test_slice_tosa_MI(self, tensor):
 
     @parameterized.expand(Slice.test_tensors[:2])
     def test_slice_nchw_tosa_BI(self, test_tensor: torch.Tensor):
-        self._test_slice_tosa_BI_pipeline(self.Slice(), (test_tensor,), False)
+        self._test_slice_tosa_BI_pipeline(self.Slice(), (test_tensor,))
 
     @parameterized.expand(Slice.test_tensors[2:])
     def test_slice_nhwc_tosa_BI(self, test_tensor: torch.Tensor):
-        self._test_slice_tosa_BI_pipeline(self.Slice(), (test_tensor,), True)
+        self._test_slice_tosa_BI_pipeline(self.Slice(), (test_tensor,))
 
     @parameterized.expand(Slice.test_tensors)
     def test_slice_u55_BI(self, test_tensor: torch.Tensor):
diff --git a/backends/arm/test/ops/test_softmax.py b/backends/arm/test/ops/test_softmax.py
index fd78d1a9acf..794f6b791f7 100644
--- a/backends/arm/test/ops/test_softmax.py
+++ b/backends/arm/test/ops/test_softmax.py
@@ -1,5 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -33,10 +33,6 @@
     ("ones", torch.ones(10, 10), 1),
     ("ones_neg_dim", torch.ones(10, 3, 4), -1),
     ("randn_neg_dim", torch.randn(10, 5, 8, 7), -3),
-]
-
-test_data_suite_u55_xfails = [
-    # (test_name, test_data, dim)
     ("zeros", torch.zeros(10, 8, 5, 2), 0),
     ("zeros_neg_dim", torch.zeros(10, 7, 8, 9), -4),
     ("rand", torch.rand(1, 2, 5, 8), 2),
@@ -161,17 +157,6 @@ def test_softmax_tosa_u55_BI(
     ):
         self._test_softmax_tosa_u55_BI_pipeline(self.Softmax(dim=dim), (test_data,))
 
-    # Expected to fail as this is not supported on u55.
-    @parameterized.expand(test_data_suite_u55_xfails)
-    @unittest.expectedFailure
-    def test_softmax_tosa_u55_BI_xfails(
-        self,
-        test_name: str,
-        test_data: torch.Tensor,
-        dim: int,
-    ):
-        self._test_softmax_tosa_u55_BI_pipeline(self.Softmax(dim=dim), (test_data,))
-
     @parameterized.expand(test_data_suite)
     def test_softmax_tosa_u85_BI(
         self,
diff --git a/backends/arm/test/ops/test_squeeze.py b/backends/arm/test/ops/test_squeeze.py
index ac26fd73fac..9f02392e1e2 100644
--- a/backends/arm/test/ops/test_squeeze.py
+++ b/backends/arm/test/ops/test_squeeze.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -136,7 +136,7 @@ def test_squeeze_u55_BI(
         test_tensor: torch.Tensor,
     ):
         self._test_squeeze_ethosu_BI_pipeline(
-            common.get_u55_compile_spec(permute_memory_to_nhwc=False),
+            common.get_u55_compile_spec(),
             self.Squeeze(),
             (test_tensor,),
             "torch.ops.aten.squeeze.default",
@@ -148,7 +148,7 @@ def test_squeeze_u85_BI(
         test_tensor: torch.Tensor,
     ):
         self._test_squeeze_ethosu_BI_pipeline(
-            common.get_u85_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u85_compile_spec(),
             self.Squeeze(),
             (test_tensor,),
             "torch.ops.aten.squeeze.default",
@@ -169,7 +169,7 @@ def test_squeeze_dim_tosa_BI(self, test_tensor: torch.Tensor, dim: int):
     @parameterized.expand(SqueezeDim.test_parameters)
     def test_squeeze_dim_u55_BI(self, test_tensor: torch.Tensor, dim: int):
         self._test_squeeze_ethosu_BI_pipeline(
-            common.get_u55_compile_spec(permute_memory_to_nhwc=False),
+            common.get_u55_compile_spec(),
             self.SqueezeDim(),
             (test_tensor, dim),
             "torch.ops.aten.squeeze.dim",
@@ -178,7 +178,7 @@ def test_squeeze_dim_u55_BI(self, test_tensor: torch.Tensor, dim: int):
     @parameterized.expand(SqueezeDim.test_parameters)
     def test_squeeze_dim_u85_BI(self, test_tensor: torch.Tensor, dim: int):
         self._test_squeeze_ethosu_BI_pipeline(
-            common.get_u85_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u85_compile_spec(),
             self.SqueezeDim(),
             (test_tensor, dim),
             "torch.ops.aten.squeeze.dim",
@@ -199,7 +199,7 @@ def test_squeeze_dims_tosa_BI(self, test_tensor: torch.Tensor, dims: tuple[int])
     @parameterized.expand(SqueezeDims.test_parameters)
     def test_squeeze_dims_u55_BI(self, test_tensor: torch.Tensor, dims: tuple[int]):
         self._test_squeeze_ethosu_BI_pipeline(
-            common.get_u55_compile_spec(permute_memory_to_nhwc=False),
+            common.get_u55_compile_spec(),
             self.SqueezeDims(),
             (test_tensor, dims),
             "torch.ops.aten.squeeze.dims",
diff --git a/backends/arm/test/ops/test_sum.py b/backends/arm/test/ops/test_sum.py
index 098e0fd1bc2..7f85cba4c39 100644
--- a/backends/arm/test/ops/test_sum.py
+++ b/backends/arm/test/ops/test_sum.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -39,9 +39,6 @@ class Sum(torch.nn.Module):
             ((torch.rand(10), 0, True),),
             ((torch.rand(10, 10), 1, False),),
             ((torch.rand(1, 2, 3, 4), 3, True),),
-        ]
-
-        test_parameters_u55_xfails: list[Tuple[exampledata_t]] = [
             ((torch.rand(10, 10, 10), [-3, 1], True),),
             ((torch.rand(2, 1, 5, 8), 1, False),),
             ((torch.rand(1, 2, 8, 8), [2, 3, 0], True),),
@@ -129,17 +126,7 @@ def test_sum_u55_BI(self, test_data: tuple[exampledata_t]):
         self._test_sum_ethosu_BI_pipeline(
             self.Sum(),
             test_data,
-            common.get_u55_compile_spec(permute_memory_to_nhwc=False),
-        )
-
-    # Expected to fail as this is not supported on u55.
-    @parameterized.expand(Sum.test_parameters_u55_xfails)
-    @unittest.expectedFailure
-    def test_sum_u55_BI_xfails(self, test_data: tuple[exampledata_t]):
-        self._test_sum_ethosu_BI_pipeline(
-            self.Sum(),
-            test_data,
-            common.get_u55_compile_spec(permute_memory_to_nhwc=False),
+            common.get_u55_compile_spec(),
         )
 
     @parameterized.expand(Sum.test_parameters)
@@ -147,5 +134,5 @@ def test_sum_u85_BI(self, test_data: tuple[exampledata_t]):
         self._test_sum_ethosu_BI_pipeline(
             self.Sum(),
             test_data,
-            common.get_u85_compile_spec(permute_memory_to_nhwc=True),
+            common.get_u85_compile_spec(),
         )
diff --git a/backends/arm/test/ops/test_unsqueeze.py b/backends/arm/test/ops/test_unsqueeze.py
index a6faf70af05..68f4fe46123 100644
--- a/backends/arm/test/ops/test_unsqueeze.py
+++ b/backends/arm/test/ops/test_unsqueeze.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -95,7 +95,7 @@ def test_unsqueeze_tosa_MI(self, test_tensor: torch.Tensor):
     def test_unsqueeze_tosa_BI(self, test_tensor: torch.Tensor):
         self._test_unsqueeze_tosa_BI_pipeline(self.Unsqueeze(), (test_tensor, 0))
 
-    @parameterized.expand(Unsqueeze.test_parameters[:-1])
+    @parameterized.expand(Unsqueeze.test_parameters)
     def test_unsqueeze_u55_BI(self, test_tensor: torch.Tensor):
         self._test_unsqueeze_ethosu_BI_pipeline(
             common.get_u55_compile_spec(),
diff --git a/backends/arm/test/ops/test_var.py b/backends/arm/test/ops/test_var.py
index 322ac5b0edd..e1fed058177 100644
--- a/backends/arm/test/ops/test_var.py
+++ b/backends/arm/test/ops/test_var.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -158,10 +158,8 @@ def test_var_tosa_MI(self, test_tensor: torch.Tensor, keepdim, correction):
     def test_var_tosa_BI(self, test_tensor: torch.Tensor, keepdim, correction):
         self._test_var_tosa_BI_pipeline(self.Var(), (test_tensor, keepdim, correction))
 
-    # Expected to fail as this is not supported on u55.
     @parameterized.expand(Var.test_parameters)
-    @unittest.expectedFailure
-    def test_var_u55_BI_xfails(self, test_tensor: torch.Tensor, keepdim, correction):
+    def test_var_u55_BI(self, test_tensor: torch.Tensor, keepdim, correction):
         self._test_var_ethosu_BI_pipeline(
             self.Var(),
             common.get_u55_compile_spec(),
@@ -196,18 +194,6 @@ def test_var_dim_u55_BI(self, test_tensor: torch.Tensor, dim, keepdim, correctio
             (test_tensor, dim, keepdim, correction),
         )
 
-    # Expected to fail as this is not supported on u55.
-    @parameterized.expand(VarDim.test_parameters_u55_xfails)
-    @unittest.expectedFailure
-    def test_var_dim_u55_BI_xfails(
-        self, test_tensor: torch.Tensor, dim, keepdim, correction
-    ):
-        self._test_var_ethosu_BI_pipeline(
-            self.VarDim(),
-            common.get_u55_compile_spec(),
-            (test_tensor, dim, keepdim, correction),
-        )
-
     @parameterized.expand(VarDim.test_parameters)
     def test_var_dim_u85_BI(self, test_tensor: torch.Tensor, dim, keepdim, correction):
         self._test_var_ethosu_BI_pipeline(
@@ -232,10 +218,8 @@ def test_var_correction_tosa_BI(
             self.VarCorrection(), (test_tensor, dim, keepdim, correction)
         )
 
-    # Expected to fail as this is not supported on u55.
     @parameterized.expand(VarCorrection.test_parameters)
-    @unittest.expectedFailure
-    def test_var_correction_u55_BI_xfails(
+    def test_var_correction_u55_BI(
         self, test_tensor: torch.Tensor, dim, keepdim, correction
     ):
         self._test_var_ethosu_BI_pipeline(
diff --git a/backends/arm/test/ops/test_view.py b/backends/arm/test/ops/test_view.py
index 1603a2a37d1..f90ae402067 100644
--- a/backends/arm/test/ops/test_view.py
+++ b/backends/arm/test/ops/test_view.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -129,15 +129,10 @@ def test_view_tosa_MI(self, test_tensor: torch.Tensor, new_shape):
     def test_view_tosa_BI(self, test_tensor: torch.Tensor, new_shape):
         self._test_view_tosa_BI_pipeline(self.View(), (test_tensor, new_shape))
 
-    @parameterized.expand(View.no_transpose_tests)
+    @parameterized.expand(View.needs_transpose_tests + View.no_transpose_tests)
     def test_view_u55_BI(self, test_tensor: torch.Tensor, new_shape):
         self._test_view_u55_BI_pipeline(self.View(), (test_tensor, new_shape))
 
-    @parameterized.expand(View.needs_transpose_tests)
-    @unittest.expectedFailure
-    def test_view_transpose_u55_BI(self, test_tensor: torch.Tensor, new_shape):
-        self._test_view_u55_BI_pipeline(self.View(), (test_tensor, new_shape))
-
     @parameterized.expand(View.needs_transpose_tests + View.no_transpose_tests)
     def test_view_u85_BI(self, test_tensor: torch.Tensor, new_shape):
         self._test_view_u85_BI_pipeline(self.View(), (test_tensor, new_shape))
diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py
index b3f5b4f05b2..2b65c306be7 100644
--- a/backends/arm/test/tester/arm_tester.py
+++ b/backends/arm/test/tester/arm_tester.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -15,7 +15,7 @@
 
 import torch.fx
 
-from executorch.backends.arm.arm_backend import get_intermediate_path, is_permute_memory
+from executorch.backends.arm.arm_backend import get_intermediate_path
 from executorch.backends.arm.arm_partitioner import ArmPartitioner
 from executorch.backends.arm.quantizer.arm_quantizer import (
     ArmQuantizer,
@@ -329,7 +329,6 @@ def run_method_and_compare_outputs(
         logger.info(
             f"Comparing Stage '{self.stage_name(test_stage)}' with Stage '{self.stage_name(reference_stage)}'"
         )
-        is_nhwc = is_permute_memory(self.compile_spec)
 
         # Loop inputs and compare reference stage with the compared stage.
         for run_iteration in range(num_runs):
@@ -344,10 +343,7 @@ def run_method_and_compare_outputs(
                 if isinstance(arg, tuple) and isinstance(arg[0], torch.Tensor):
                     test_input.extend([tensor.clone() for tensor in arg])
 
-            if (
-                is_nhwc
-                and test_stage == self.stages[self.stage_name(tester.ToExecutorch)]
-            ):
+            if test_stage == self.stages[self.stage_name(tester.ToExecutorch)]:
                 test_input = self.transpose_data_format(test_input, "NHWC")
 
             input_shapes = [
@@ -359,10 +355,7 @@ def run_method_and_compare_outputs(
 
             reference_output = reference_stage.run_artifact(reference_input)
             test_output = test_stage.run_artifact(test_input)
-            if (
-                is_nhwc
-                and test_stage == self.stages[self.stage_name(tester.ToExecutorch)]
-            ):
+            if test_stage == self.stages[self.stage_name(tester.ToExecutorch)]:
                 test_output = self.transpose_data_format(test_output, "NCHW")
 
             self._compare_outputs(
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py
index 2de1e713c9f..1208d79b061 100644
--- a/examples/arm/aot_arm_compiler.py
+++ b/examples/arm/aot_arm_compiler.py
@@ -1,6 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
-# Copyright 2023-2024 Arm Limited and/or its affiliates.
+# Copyright 2023-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -263,11 +263,7 @@ def get_compile_spec(
 ) -> ArmCompileSpecBuilder:
     spec_builder = None
     if target == "TOSA":
-        spec_builder = (
-            ArmCompileSpecBuilder()
-            .tosa_compile_spec("TOSA-0.80+BI")
-            .set_permute_memory_format(True)
-        )
+        spec_builder = ArmCompileSpecBuilder().tosa_compile_spec("TOSA-0.80+BI")
     elif "ethos-u55" in target:
         spec_builder = (
             ArmCompileSpecBuilder()
@@ -277,7 +273,6 @@ def get_compile_spec(
                 memory_mode=memory_mode,
                 extra_flags="--debug-force-regor --output-format=raw --verbose-operators --verbose-cycle-estimate",
             )
-            .set_permute_memory_format(True)
             .set_quantize_io(True)
             .set_input_order(reorder_inputs)
         )
@@ -290,7 +285,6 @@ def get_compile_spec(
                 memory_mode=memory_mode,
                 extra_flags="--output-format=raw --verbose-operators --verbose-cycle-estimate",
             )
-            .set_permute_memory_format(True)
             .set_quantize_io(True)
             .set_input_order(reorder_inputs)
         )
diff --git a/examples/arm/setup.sh b/examples/arm/setup.sh
index bf922360fde..5498bd78974 100755
--- a/examples/arm/setup.sh
+++ b/examples/arm/setup.sh
@@ -2,7 +2,7 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
-# Copyright 2023-2024 Arm Limited and/or its affiliates.
+# Copyright 2023-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -91,8 +91,8 @@ tosa_reference_model_url="https://review.mlplatform.org/tosa/reference_model"
 tosa_reference_model_rev="v0.80.1"
 
 # vela
-vela_repo_url="https://review.mlplatform.org/ml/ethos-u/ethos-u-vela"
-vela_rev="5427dc7e9c1a4c7d554163290faeea75f168772d"
+vela_repo_url="https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela"
+vela_rev="fc970e3da72e5f6930b840b357684126602b3126"
 
 ########
 ### Mandatory user args