diff --git a/backends/arm/README.md b/backends/arm/README.md
index 375259c62ab..6f4642f8d44 100644
--- a/backends/arm/README.md
+++ b/backends/arm/README.md
@@ -9,7 +9,7 @@ The expected flow is:
  * torch.nn.module -> TOSA -> command_stream for fully AoT flows e.g. embedded.
  * torch.nn.module -> TOSA for flows supporting a JiT compilation step.
 
-Current backend support is being developed for TOSA to Ethos(TM)-U55/65 via the
+Current backend support is being developed for TOSA to Ethos(TM)-U55/65/85 via the
 ethos-u-vela compilation stack. which follows the fully AoT flow.
 
 ## Layout
diff --git a/backends/arm/arm_backend.py b/backends/arm/arm_backend.py
index 27fd36ca0e1..db8bad166cc 100644
--- a/backends/arm/arm_backend.py
+++ b/backends/arm/arm_backend.py
@@ -54,7 +54,7 @@ def ethosu_compile_spec(
         memory_mode: Optional[str] = None,
         extra_flags: Optional[str] = None,
         config_ini: Optional[str] = "Arm/vela.ini",
-    ):
+    ) -> "ArmCompileSpecBuilder":
         """
         Generate compile spec for Ethos-U NPU
 
@@ -84,7 +84,7 @@ def ethosu_compile_spec(
 
         return self
 
-    def tosa_compile_spec(self):
+    def tosa_compile_spec(self) -> "ArmCompileSpecBuilder":
         """
         Generate compile spec for TOSA flatbuffer output
         """
@@ -94,14 +94,18 @@ def tosa_compile_spec(self):
         self.output_format = "tosa"
         return self
 
-    def dump_intermediate_artifacts_to(self, output_path: str):
+    def dump_intermediate_artifacts_to(
+        self, output_path: str
+    ) -> "ArmCompileSpecBuilder":
         """
         Sets a path for dumping intermediate results during such as tosa and pte.
         """
         self.path_for_intermediates = output_path
         return self
 
-    def set_permute_memory_format(self, set_nhwc_permutation: bool = True):
+    def set_permute_memory_format(
+        self, set_nhwc_permutation: bool = True
+    ) -> "ArmCompileSpecBuilder":
         """
         Permute to channel last in compiler and runtime. Compilation and
         runtime will convert rank 4 inputs to channel last for each sub-graph.
@@ -109,7 +113,7 @@ def set_permute_memory_format(self, set_nhwc_permutation: bool = True):
         self.permute_nhwc = set_nhwc_permutation
         return self
 
-    def set_quantize_io(self, quantize_io: bool = False):
+    def set_quantize_io(self, quantize_io: bool = False) -> "ArmCompileSpecBuilder":
         """
         Quantization of inputs and dequantization of outputs for cases where
         whole graph is quantized and method signature is not of quantized type.
@@ -117,7 +121,7 @@ def set_quantize_io(self, quantize_io: bool = False):
         self.quantize_io = quantize_io
         return self
 
-    def build(self):
+    def build(self) -> List[CompileSpec]:
         """
         Generate a list of compile spec objects from the builder
         """
diff --git a/backends/arm/arm_vela.py b/backends/arm/arm_vela.py
index 53533947c49..614ecdcdd9e 100644
--- a/backends/arm/arm_vela.py
+++ b/backends/arm/arm_vela.py
@@ -48,7 +48,12 @@ def vela_compile(tosa_graph, args: List[str]):
         args.append(tosa_path)
         vela.main(" ".join(args).split(" "))
 
-        np_path = os.path.join(output_dir, "out_sg0_vela.npz")
+        if any("ethos-u85" in arg for arg in args) or any(
+            "debug-force-regor" in arg for arg in args
+        ):
+            np_path = os.path.join(tmpdir, "output", "out_vela.npz")
+        else:
+            np_path = os.path.join(tmpdir, "output", "out_sg0_vela.npz")
         blocks = b""
 
         with np.load(np_path, allow_pickle=False) as data:
diff --git a/backends/arm/runtime/ArmBackendEthosU.cpp b/backends/arm/runtime/ArmBackendEthosU.cpp
index 26ffb0b9700..6d9ab6b0091 100644
--- a/backends/arm/runtime/ArmBackendEthosU.cpp
+++ b/backends/arm/runtime/ArmBackendEthosU.cpp
@@ -281,18 +281,27 @@ class ArmBackend final : public ::executorch::runtime::BackendInterface {
       }
     }
     if (!permuted_shape) {
-      // Error check matching shapes in the general case
+      // Check the number of elements in each tensor match
+      int tensor_count = 1;
+      int io_count = 1;
+
       for (int i = 0; i < tensor.dim(); i++) {
-        if (tensor.size(i) != io->shape[i]) {
-          ET_LOG(Error, "Tensor input/output %d mismatched shape", index);
-          ET_LOG(
-              Error,
-              "dimension %d mismatch, %zd != %d",
-              index,
-              tensor.size(i),
-              io->shape[i]);
-          return Error::InvalidProgram;
-        }
+        tensor_count = tensor_count * tensor.size(i);
+      }
+
+      // The VelaIO type has a shape of fixed size 4
+      for (int i = 0; i < 4; i++) {
+        io_count = io_count * io->shape[i];
+      }
+
+      if (tensor_count != io_count) {
+        ET_LOG(Error, "Input tensor sizes do not match");
+        ET_LOG(
+            Error,
+            "Program expects %d elements but got %d",
+            io_count,
+            tensor_count);
+        return Error::InvalidProgram;
       }
     }
     *is_permuted = permuted_shape;
diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py
index f85fd1f2dac..0d50f1882da 100644
--- a/backends/arm/test/common.py
+++ b/backends/arm/test/common.py
@@ -14,6 +14,7 @@
 import torch
 
 from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder
+from executorch.exir.backend.compile_spec_schema import CompileSpec
 
 _enabled_options: list[str] = []
 
@@ -85,7 +86,9 @@ def is_option_enabled(option: str, fail_if_not_enabled: bool = False) -> bool:
             return False
 
 
-def get_tosa_compile_spec(permute_memory_to_nhwc=True, custom_path=None):
+def get_tosa_compile_spec(
+    permute_memory_to_nhwc=True, custom_path=None
+) -> list[CompileSpec]:
     """
     Default compile spec for TOSA tests.
     """
@@ -112,8 +115,8 @@ def get_tosa_compile_spec_unbuilt(
 
 
 def get_u55_compile_spec(
-    permute_memory_to_nhwc=False, quantize_io=False, custom_path=None
-):
+    permute_memory_to_nhwc=True, quantize_io=False, custom_path=None
+) -> list[CompileSpec]:
     """
     Default compile spec for Ethos-U55 tests.
     """
@@ -122,10 +125,21 @@ def get_u55_compile_spec(
     ).build()
 
 
+def get_u85_compile_spec(
+    permute_memory_to_nhwc=True, quantize_io=False, custom_path=None
+) -> list[CompileSpec]:
+    """
+    Default compile spec for Ethos-U85 tests.
+    """
+    return get_u85_compile_spec_unbuilt(
+        permute_memory_to_nhwc, quantize_io=quantize_io, custom_path=custom_path
+    ).build()
+
+
 def get_u55_compile_spec_unbuilt(
-    permute_memory_to_nhwc=False, quantize_io=False, custom_path=None
+    permute_memory_to_nhwc=True, quantize_io=False, custom_path=None
 ) -> ArmCompileSpecBuilder:
-    """Get the ArmCompileSpecBuilder for the default TOSA tests, to modify
+    """Get the ArmCompileSpecBuilder for the Ethos-U55 tests, to modify
     the compile spec before calling .build() to finalize it.
     """
     artifact_path = custom_path or tempfile.mkdtemp(prefix="arm_u55_")
@@ -137,7 +151,29 @@ def get_u55_compile_spec_unbuilt(
             "ethos-u55-128",
             system_config="Ethos_U55_High_End_Embedded",
             memory_mode="Shared_Sram",
-            extra_flags=None,
+            extra_flags="--debug-force-regor --output-format=raw",
+        )
+        .set_quantize_io(is_option_enabled("quantize_io") or quantize_io)
+        .set_permute_memory_format(permute_memory_to_nhwc)
+        .dump_intermediate_artifacts_to(artifact_path)
+    )
+    return compile_spec
+
+
+def get_u85_compile_spec_unbuilt(
+    permute_memory_to_nhwc=True, quantize_io=False, custom_path=None
+) -> list[CompileSpec]:
+    """Get the ArmCompileSpecBuilder for the Ethos-U85 tests, to modify
+    the compile spec before calling .build() to finalize it.
+    """
+    artifact_path = custom_path or tempfile.mkdtemp(prefix="arm_u85_")
+    compile_spec = (
+        ArmCompileSpecBuilder()
+        .ethosu_compile_spec(
+            "ethos-u85-128",
+            system_config="Ethos_U85_SYS_DRAM_Mid",
+            memory_mode="Shared_Sram",
+            extra_flags="--output-format=raw",
         )
         .set_quantize_io(is_option_enabled("quantize_io") or quantize_io)
         .set_permute_memory_format(permute_memory_to_nhwc)
diff --git a/backends/arm/test/ops/test_cat.py b/backends/arm/test/ops/test_cat.py
index f677aa5590c..a40ae43b673 100644
--- a/backends/arm/test/ops/test_cat.py
+++ b/backends/arm/test/ops/test_cat.py
@@ -125,7 +125,9 @@ def test_cat_tosa_BI(self, operands: tuple[torch.Tensor, ...], dim: int):
         test_data = (operands, dim)
         self._test_cat_tosa_BI_pipeline(self.Cat(), test_data)
 
+    # TODO: Remove @unittest.expectedFailure when this issue is fixed in Regor
     @parameterized.expand(Cat.test_parameters)
+    @unittest.expectedFailure
     def test_cat_u55_BI(self, operands: tuple[torch.Tensor, ...], dim: int):
         test_data = (operands, dim)
         self._test_cat_u55_BI_pipeline(self.Cat(), test_data)
diff --git a/backends/arm/test/ops/test_exp.py b/backends/arm/test/ops/test_exp.py
index 79020ade25c..4f4935d482c 100644
--- a/backends/arm/test/ops/test_exp.py
+++ b/backends/arm/test/ops/test_exp.py
@@ -103,8 +103,6 @@ def test_exp_tosa_MI(
     def test_exp_tosa_BI(self, test_name: str, test_data: torch.Tensor):
         self._test_exp_tosa_BI_pipeline(self.Exp(), (test_data,))
 
-    # Fails due to Vela diff from Tosa spec, expected to work with Regor.
     @parameterized.expand(test_data_suite)
-    @unittest.expectedFailure
     def test_exp_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor):
         self._test_exp_tosa_u55_BI_pipeline(self.Exp(), (test_data,))
diff --git a/backends/arm/test/ops/test_log.py b/backends/arm/test/ops/test_log.py
index 80bc17c987f..90066b3a63b 100644
--- a/backends/arm/test/ops/test_log.py
+++ b/backends/arm/test/ops/test_log.py
@@ -103,8 +103,6 @@ def test_log_tosa_MI(
     def test_log_tosa_BI(self, test_name: str, test_data: torch.Tensor):
         self._test_log_tosa_BI_pipeline(self.Log(), (test_data,))
 
-    # Fails due to Vela diff from Tosa spec, logected to work with Regor.
     @parameterized.expand(test_data_suite)
-    @unittest.expectedFailure
     def test_log_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor):
         self._test_log_tosa_u55_BI_pipeline(self.Log(), (test_data,))
diff --git a/backends/arm/test/ops/test_mul.py b/backends/arm/test/ops/test_mul.py
index dee8b62f1b2..2aac3f22f13 100644
--- a/backends/arm/test/ops/test_mul.py
+++ b/backends/arm/test/ops/test_mul.py
@@ -141,9 +141,7 @@ def test_mul_tosa_BI(
         test_data = (input_, other_)
         self._test_mul_tosa_BI_pipeline(self.Mul(), test_data)
 
-    # Expected to fail since RESCALE cannot be fused with MUL in Vela.
     @parameterized.expand(test_data_sute)
-    @unittest.expectedFailure
     def test_mul_u55_BI(
         self,
         test_name: str,
diff --git a/backends/arm/test/ops/test_sigmoid.py b/backends/arm/test/ops/test_sigmoid.py
index 7a0435689f4..369019774fa 100644
--- a/backends/arm/test/ops/test_sigmoid.py
+++ b/backends/arm/test/ops/test_sigmoid.py
@@ -145,8 +145,6 @@ def test_sigmoid_add_sigmoid_tosa_BI(self):
             self.SigmoidAddSigmoid(), (test_data_suite[4][1], test_data_suite[3][1])
         )
 
-    # Fails due to Vela diff from Tosa spec, expected to work with Regor.
     @parameterized.expand(test_data_suite)
-    @unittest.expectedFailure
     def test_sigmoid_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor):
         self._test_sigmoid_tosa_u55_BI_pipeline(self.Sigmoid(), (test_data,))
diff --git a/backends/arm/test/ops/test_sub.py b/backends/arm/test/ops/test_sub.py
index 2ae7c3ab36f..0a9f159f365 100644
--- a/backends/arm/test/ops/test_sub.py
+++ b/backends/arm/test/ops/test_sub.py
@@ -104,9 +104,7 @@ def test_sub_tosa_BI(self, test_data: torch.Tensor):
         test_data = (test_data,)
         self._test_sub_tosa_BI_pipeline(self.Sub(), test_data)
 
-    # Expected to fail since RESCALE cannot be fused with SUB in Vela.
     @parameterized.expand(Sub.test_parameters)
-    @unittest.expectedFailure
     def test_sub_u55_BI(self, test_data: torch.Tensor):
         test_data = (test_data,)
         self._test_sub_u55_BI_pipeline(self.Sub(), test_data)
diff --git a/examples/arm/setup.sh b/examples/arm/setup.sh
index 9cef98e6227..8c39a3a8668 100755
--- a/examples/arm/setup.sh
+++ b/examples/arm/setup.sh
@@ -216,7 +216,7 @@ function setup_vela() {
     if [[ ! -e ethos-u-vela ]]; then
         git clone https://review.mlplatform.org/ml/ethos-u/ethos-u-vela
         repo_dir="${root_dir}/ethos-u-vela"
-        base_rev=7706c1281166e7611f4300ed26338087152a33c9
+        base_rev=d362f5443f67b1e6213a9d8f124edff758efac96
         patch_repo
     fi
     cd "${root_dir}/ethos-u-vela"