diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py
index f1b97625728..7ebf89e3927 100644
--- a/backends/arm/test/common.py
+++ b/backends/arm/test/common.py
@@ -145,14 +145,3 @@ def get_u85_compile_spec_unbuilt(
         .dump_intermediate_artifacts_to(artifact_path)
     )
     return compile_spec
-
-
-def get_target_board(compile_spec: list[CompileSpec]) -> str | None:
-    for spec in compile_spec:
-        if spec.key == "compile_flags":
-            flags = spec.value.decode()
-            if "u55" in flags:
-                return "corstone-300"
-            elif "u85" in flags:
-                return "corstone-320"
-    return None
diff --git a/backends/arm/test/models/test_mobilenet_v2_arm.py b/backends/arm/test/models/test_mobilenet_v2_arm.py
index d29695dedf3..509690dd2fb 100644
--- a/backends/arm/test/models/test_mobilenet_v2_arm.py
+++ b/backends/arm/test/models/test_mobilenet_v2_arm.py
@@ -98,7 +98,7 @@ def test_mv2_u55_BI(self):
         )
         if conftest.is_option_enabled("corstone_fvp"):
             tester.run_method_and_compare_outputs(
-                atol=1.0, qtol=1, inputs=self.model_inputs, target_board="corstone-300"
+                atol=1.0, qtol=1, inputs=self.model_inputs
             )
 
     @pytest.mark.slow
@@ -118,5 +118,5 @@ def test_mv2_u85_BI(self):
         )
         if conftest.is_option_enabled("corstone_fvp"):
             tester.run_method_and_compare_outputs(
-                atol=1.0, qtol=1, inputs=self.model_inputs, target_board="corstone-320"
+                atol=1.0, qtol=1, inputs=self.model_inputs
             )
diff --git a/backends/arm/test/ops/test_max_pool.py b/backends/arm/test/ops/test_max_pool.py
index e3502baf2c7..71d9feca8bf 100644
--- a/backends/arm/test/ops/test_max_pool.py
+++ b/backends/arm/test/ops/test_max_pool.py
@@ -173,9 +173,7 @@ def test_maxpool2d_tosa_u55_BI(
             (test_data,),
         )
         if conftest.is_option_enabled("corstone_fvp"):
-            tester.run_method_and_compare_outputs(
-                qtol=1, inputs=(test_data,), target_board="corstone-300"
-            )
+            tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,))
 
     @parameterized.expand(test_data_suite)
     @pytest.mark.corstone_fvp
@@ -191,9 +189,7 @@ def test_maxpool2d_tosa_u85_BI(
             (test_data,),
         )
         if conftest.is_option_enabled("corstone_fvp"):
-            tester.run_method_and_compare_outputs(
-                qtol=1, inputs=(test_data,), target_board="corstone-320"
-            )
+            tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,))
 
     @parameterized.expand(test_data_suite_mult_batches)
     def test_maxpool2d_tosa_MI_mult_batches(
@@ -232,9 +228,7 @@ def test_maxpool2d_tosa_u55_BI_mult_batches(
             (test_data,),
         )
         if conftest.is_option_enabled("corstone_fvp"):
-            tester.run_method_and_compare_outputs(
-                qtol=1, inputs=(test_data,), target_board="corstone-300"
-            )
+            tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,))
 
     @parameterized.expand(test_data_suite_mult_batches)
     @pytest.mark.corstone_fvp
@@ -251,6 +245,4 @@ def test_maxpool2d_tosa_u85_BI_mult_batches(
             (test_data,),
         )
         if conftest.is_option_enabled("corstone_fvp"):
-            tester.run_method_and_compare_outputs(
-                qtol=1, inputs=(test_data,), target_board="corstone-320"
-            )
+            tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,))
diff --git a/backends/arm/test/ops/test_maximum.py b/backends/arm/test/ops/test_maximum.py
index c6280fafbbd..a365642b450 100644
--- a/backends/arm/test/ops/test_maximum.py
+++ b/backends/arm/test/ops/test_maximum.py
@@ -121,9 +121,7 @@ def test_maximum_u55_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
             self.Maximum(), common.get_u55_compile_spec(), test_data
         )
         if conftest.is_option_enabled("corstone_fvp"):
-            tester.run_method_and_compare_outputs(
-                qtol=1, inputs=test_data, target_board="corstone-300"
-            )
+            tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
 
     @parameterized.expand(Maximum.test_parameters)
     def test_maximum_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
@@ -132,6 +130,4 @@ def test_maximum_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
             self.Maximum(), common.get_u85_compile_spec(), test_data
         )
         if conftest.is_option_enabled("corstone_fvp"):
-            tester.run_method_and_compare_outputs(
-                qtol=1, inputs=test_data, target_board="corstone-320"
-            )
+            tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
diff --git a/backends/arm/test/ops/test_minimum.py b/backends/arm/test/ops/test_minimum.py
index bed0484df7a..c1a526fb5f9 100644
--- a/backends/arm/test/ops/test_minimum.py
+++ b/backends/arm/test/ops/test_minimum.py
@@ -121,9 +121,7 @@ def test_minimum_u55_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
             self.Minimum(), common.get_u55_compile_spec(), test_data
         )
         if conftest.is_option_enabled("corstone_fvp"):
-            tester.run_method_and_compare_outputs(
-                qtol=1, inputs=test_data, target_board="corstone-300"
-            )
+            tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
 
     @parameterized.expand(Minimum.test_parameters)
     def test_minimum_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
@@ -133,5 +131,6 @@ def test_minimum_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
         )
         if conftest.is_option_enabled("corstone_fvp"):
             tester.run_method_and_compare_outputs(
-                qtol=1, inputs=test_data, target_board="corstone-320"
+                qtol=1,
+                inputs=test_data,
             )
diff --git a/backends/arm/test/runner_utils.py b/backends/arm/test/runner_utils.py
index 3851e41b73e..ef779971a8b 100644
--- a/backends/arm/test/runner_utils.py
+++ b/backends/arm/test/runner_utils.py
@@ -10,8 +10,8 @@
 import shutil
 import subprocess
 import tempfile
-
 from pathlib import Path
+
 from typing import cast, Dict, List, Literal, Optional, Tuple
 
 import numpy as np
@@ -21,13 +21,14 @@
 
 from executorch.backends.arm.test.conftest import is_option_enabled
 from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.exir import ExecutorchProgramManager, ExportedProgram
+from executorch.exir.backend.compile_spec_schema import CompileSpec
 from executorch.exir.lowered_backend_module import LoweredBackendModule
-
 from packaging.version import Version
-from torch.export import ExportedProgram
 from torch.fx.node import Node
 
 from torch.overrides import TorchFunctionMode
+from torch.testing._internal.common_utils import torch_to_numpy_dtype_dict
 from tosa import TosaGraph
 
 logger = logging.getLogger(__name__)
@@ -55,7 +56,7 @@ def __init__(
         self.dtype = dtype
 
 
-def _get_input_names(program: ExportedProgram) -> list[str]:
+def get_input_names(program: ExportedProgram) -> list[str]:
     """
     Get a list[str] with the names of the inputs to this model.
 
@@ -76,7 +77,7 @@ def _get_input_names(program: ExportedProgram) -> list[str]:
     return input_names
 
 
-def _get_input_quantization_params(
+def get_input_quantization_params(
     program: ExportedProgram,
 ) -> list[QuantizationParams]:
     """
@@ -85,12 +86,10 @@ def _get_input_quantization_params(
         program (ExportedProgram): The program to get input quantization parameters from.
     Returns:
         list[QuantizationParams]: The found quantization parameters.
-    Raises:
-        RuntimeError if no quantization parameters are found.
     """
 
     quant_params = []
-    input_names = _get_input_names(program)
+    input_names = get_input_names(program)
     num_inputs = len(input_names)
     for node in program.graph.nodes:
         if (
@@ -115,7 +114,7 @@ def _get_input_quantization_params(
     return quant_params
 
 
-def _get_output_nodes(program: ExportedProgram) -> list[Node]:
+def get_output_nodes(program: ExportedProgram) -> list[Node]:
     """
     Get output node to this model.
 
@@ -135,33 +134,32 @@ def _get_output_nodes(program: ExportedProgram) -> list[Node]:
         return output_nodes
 
 
-def _get_output_quantization_params(
+def get_output_quantization_params(
     output_nodes: list[Node],
-) -> List[QuantizationParams]:
+) -> dict[Node, QuantizationParams | None]:
     """
     Get output QuantizationParams from a program.
     Args:
         output_nodes (list(Node)): A list of output nodes to get output quantization parameters from.
     Returns:
-        QuantizationParams: The found quantization parameters.
+        dictionary mapping the output nodes to the found quantization parameters.
+        If no quantization parameters were found, the entry is None.
     Raises:
         RuntimeError if no output quantization parameters are found.
     """
-    quant_params = []
+    quant_params = {}
     for node in output_nodes:
         if node.target == torch.ops.quantized_decomposed.dequantize_per_tensor.default:
-            quant_params.append(
-                QuantizationParams(
-                    node_name=node.args[0].name,
-                    scale=node.args[1],
-                    zp=node.args[2],
-                    qmin=node.args[3],
-                    qmax=node.args[4],
-                    dtype=node.args[5],
-                )
+            quant_params[node] = QuantizationParams(
+                node_name=node.args[0].name,
+                scale=node.args[1],
+                zp=node.args[2],
+                qmin=node.args[3],
+                qmax=node.args[4],
+                dtype=node.args[5],
             )
-    if len(quant_params) == 0:
-        raise RuntimeError("No Quantization parameters not found in exported model.")
+        else:
+            quant_params[node] = None
     return quant_params
 
 
@@ -177,7 +175,7 @@ def _tosa_dispatch(self, lowered_backend_module: LoweredBackendModule, inputs):
             )
         tosa_version = get_tosa_version(compile_specs)
 
-        return run_tosa_graph_static(tosa_buffer, tosa_version, inputs)
+        return run_tosa_graph(tosa_buffer, tosa_version, inputs)
 
     def __torch_function__(self, func, types, args=..., kwargs=None):
         if isinstance(func, torch._higher_order_ops.executorch_call_delegate.ExecutorchCallDelegate):  # type: ignore
@@ -193,105 +191,65 @@ def __torch_function__(self, func, types, args=..., kwargs=None):
         return func(*args, **kwargs)
 
 
-"""
-A class to store parameters needed for running programs, either in tosa or .pte format.
-"""
-
-
-class RunnerUtil:
-    def __init__(
-        self,
-        intermediate_path: str,
-        tosa_ref_model_path: Optional[str] = None,
-    ):
-        self.intermediate_path = intermediate_path
-        self.tosa_ref_model_path = tosa_ref_model_path or "tosa_reference_model"
-        assert self.intermediate_path is None or os.path.exists(
-            self.intermediate_path
-        ), f"TOSA artifact path don't exist! Path: {self.intermediate_path}"
-
-        self.is_quantized: bool = False
-        self.input_names: list[str] = None
-        self.output_name: str = None
-        self.qp_input: list[QuantizationParams] = None
-        self.qp_output: list[QuantizationParams] = None
-        self.timeout = 480
-        self.target_board: str = None
-
-        self._has_init_run = False
-
-    def init_run(
-        self,
-        exported_program: ExportedProgram,
-        edge_program: ExportedProgram,
-        is_quantized: bool,
-        target_board: str,
-    ):
-
-        self.input_names = _get_input_names(edge_program)
-        self.output_nodes = _get_output_nodes(exported_program)
-
-        self.is_quantized = is_quantized
-        self.target_board = target_board
-
-        if is_quantized:
-            self.qp_input = _get_input_quantization_params(exported_program)
-            self.qp_output = _get_output_quantization_params(self.output_nodes)
-        else:
-            self.qp_input = [None] * len(self.input_names)
-            self.qp_output = [None] * len(self.output_nodes)
-
-        self._has_init_run = True
-
-    def set_timeout(self, timeout: int):
-        self.timeout = timeout
-
-    def run_corstone(
-        self,
-        inputs: Tuple[torch.Tensor],
-    ) -> list[torch.Tensor]:
-
-        assert (
-            self._has_init_run
-        ), "RunnerUtil needs to be initialized using init_run() before running Corstone FVP."
-        if self.target_board not in ["corstone-300", "corstone-320"]:
-            raise RuntimeError(f"Unknown target board: {self.target_board}")
-
-        pte_path = os.path.join(self.intermediate_path, "program.pte")
-        assert os.path.exists(pte_path), f"Pte path '{pte_path}' not found."
-
-        for input_name, quant_param, data in zip(
-            self.input_names, self.qp_input, inputs
-        ):
-            save_bytes(self.intermediate_path, data, False, input_name, quant_param)
-
-        out_path = os.path.join(self.intermediate_path, "out")
-
-        input_paths = []
-        for name in self.input_names:
-            input_paths.append(
-                os.path.join(self.intermediate_path, f"{name}.bin"),
-            )
-        elf_path = os.path.join(
-            "cmake-out",
-            f"arm_semihosting_executor_runner_{self.target_board}",
-            "arm_executor_runner",
-        )
-        assert os.path.exists(
-            elf_path
-        ), f"Did not find build arm_executor_runner in path {elf_path}, run setup_testing.sh?"
-
-        cmd_line = f"executor_runner -m {pte_path} -o {out_path}"
-
-        for input_path in input_paths:
-            cmd_line += f" -i {input_path}"
-
-        ethos_u_extra_args = ""
-        if is_option_enabled("fast_fvp"):
-            ethos_u_extra_args = ethos_u_extra_args + "--fast"
+def run_corstone(
+    executorch_program_manager: ExecutorchProgramManager,
+    inputs: Tuple[torch.Tensor],
+    intermediate_path: str | Path,
+    target_board: Literal["corestone-300", "corestone-320"],
+    elf_path: str | Path,
+    timeout: int = 120,  # s
+) -> list[torch.Tensor]:
+    """Executes an inference of the exported_program on FVP.
+    Returns a list of tensors with the output.
+    Args:
+        `executorch_program_manager`: the executorch program to run.
+        The output of a EdgeProgramManager.to_executorch() call.
+        `inputs`: A list of tensors with the inputs of the inference.
+        `dump_path`: A directory where the .pte and inputs are saved to file.
+                     The output tensors are saved in `dump_path`/out.
+        `target_board`: Whether to run the corstone-300 FVP or the corstone-320 FVP
+        `elf_path`: The path to the runtime elf. Needs to have semihosting enabled
+        and match the target_board.
+        `timeout`: The timeout until the FVP terminates the elf, in seconds.
+    A runtime with semihosting needs
+    Limitations:
+        Relies on the output tensors from the exported program
+        to figure out the shape and dtype of the buffer that was
+        output from the FVP.
+    """
 
-        command_args = {
-            "corstone-300": [
+    exported_program = executorch_program_manager.exported_program()
+    intermediate_path = Path(intermediate_path)
+    intermediate_path.mkdir(exist_ok=True)
+    elf_path = Path(elf_path)
+    if not elf_path.exists():
+        raise FileNotFoundError(f"Did not find elf file {elf_path}")
+
+    # Save pte to file
+    pte_path = os.path.join(intermediate_path, "program.pte")
+    with open(pte_path, "wb") as f:
+        f.write(executorch_program_manager.buffer)
+
+    # Save inputs to file
+    input_names = get_input_names(exported_program)
+    input_paths = []
+    for input_name, input_ in zip(input_names, inputs):
+        input_path = save_bytes(intermediate_path, input_, input_name)
+        input_paths.append(input_path)
+
+    out_path = os.path.join(intermediate_path, "out")
+
+    cmd_line = f"executor_runner -m {pte_path} -o {out_path}"
+    for input_path in input_paths:
+        cmd_line += f" -i {input_path}"
+
+    ethos_u_extra_args = ""
+    if is_option_enabled("fast_fvp"):
+        ethos_u_extra_args = ethos_u_extra_args + "--fast"
+
+    match target_board:
+        case "corstone-300":
+            command_args = [
                 "FVP_Corstone_SSE-300_Ethos-U55",
                 "-C",
                 "ethosu.num_macs=128",
@@ -314,9 +272,10 @@ def run_corstone(
                 "-a",
                 elf_path,
                 "--timelimit",
-                f"{self.timeout}",
-            ],
-            "corstone-320": [
+                f"{timeout}",
+            ]
+        case "corstone-320":
+            command_args = [
                 "FVP_Corstone_SSE-320",
                 "-C",
                 "mps4_board.subsystem.ethosu.num_macs=128",
@@ -345,212 +304,44 @@ def run_corstone(
                 "-a",
                 elf_path,
                 "--timelimit",
-                f"{self.timeout}",
-            ],
-        }
-
-        result = _run_cmd(command_args[self.target_board], check=False)
-        if result.returncode != 0:
-            raise RuntimeError(
-                f"Failed to run {command_args[self.target_board]}\nOutput:\n{result.stdout.decode()}\nError: {result.stderr.decode()}"
-            )
-        result_stdout = result.stdout.decode()
+                f"{timeout}",
+            ]
+        case _:
+            raise ValueError(f"Unknown target board {target_board}")
 
-        error_regex = r"(^[EF][: ].*$)|(^.*Hard fault.*$)|(^.*Assertion.*$)"
+    result = _run_cmd(command_args)
 
-        # Check for errors in the output
-        # regex to check for error or fault messages in stdout from FVP
-        if re.compile(error_regex, re.MULTILINE).search(result_stdout):
-            raise RuntimeError(
-                f"Corstone simulation failed:\ncmd: {command_args[self.target_board]}\n, log: \n {result_stdout}\n{result.stderr.decode()}"
-            )
-        output_np = []
-        for i, node in enumerate(self.output_nodes):
-            tosa_ref_output = np.fromfile(
-                os.path.join(self.intermediate_path, f"out-{i}.bin"), dtype=np.float32
-            )
-            output_shape = node.meta["val"].shape
-            output_np.append(torch.from_numpy(tosa_ref_output).reshape(output_shape))
-        return tuple(output_np)
-
-    def run_tosa_graph(
-        self, graph: TosaGraph, inputs: list[np.ndarray] | list[torch.Tensor]
-    ) -> torch.Tensor:
-        """Runs the TOSA reference model with inputs and returns the result."""
-        data_np = [
-            prep_data_for_save(
-                input, self.is_quantized, self.input_names[i], self.qp_input[i]
-            )
-            for i, input in enumerate(inputs)
-        ]
-        # tosa_profile: 0 = Base Inference, 1 = Main Inference, 2 = Main Training.
-        tosa_profile = 0 if self.is_quantized else 1
-        debug_mode = "ALL" if logger.level <= logging.DEBUG else None
-        outputs, status = tosa_reference_model.run(
-            graph,
-            data_np,
-            verbosity=_tosa_refmodel_loglevel(logger.level),
-            tosa_profile=tosa_profile,
-            initialize_variable_tensor_from_numpy=1,  # True
-            debug_mode=debug_mode,
+    # Regex to check for error or fault messages in stdout from FVP
+    result_stdout = result.stdout.decode()
+    error_regex = r"(^[EF][: ].*$)|(^.*Hard fault.*$)|(^.*Assertion.*$)"
+    if re.compile(error_regex, re.MULTILINE).search(result_stdout):
+        raise RuntimeError(
+            f"Corstone simulation failed:\ncmd: {' '.join(command_args)}\nlog: \n {result_stdout}\n{result.stderr.decode()}"
         )
 
-        assert (
-            status == tosa_reference_model.GraphStatus.TOSA_VALID
-        ), "Non-valid TOSA given to reference model."
-
-        outputs_torch = []
-        for output in outputs:
-            output = torch.from_numpy(output)
-            if self.is_quantized:
-                # Need to dequant back to FP32 for comparison with torch output
-                quant_param = self.qp_output
-                assert (
-                    quant_param is not None
-                ), "There are no quantization parameters, check output parameters"
-                output = (output.to(torch.float32) - quant_param.zp) * quant_param.scale
-            outputs_torch.append(output)
-        return tuple(outputs_torch)
-
-    def run_tosa_ref_model(
-        self,
-        inputs: Tuple[torch.Tensor],
-    ) -> list[torch.Tensor]:
-        """
-        Run TOSA reference model using the tosa_reference_model program.
-
-        In order to do that we need:
-        1. desc.json, which points to files needed by tosa_reference_model.
-        2. output.tosa, which is the TOSA buffer that describes the model we're
-           trying to run.
-
-        These two files are created by arm_backend.py as part of partition stage
-
-        All these files are saved on disk in self.intermediate_path.
-
-        Args:
-            inputs (Tuple[torch.Tensor]): The input data to run the TOSA
-
-        Returns:
-            torch.Tensor: The output of the TOSA reference model, as a torch
-                tensor.
-
-        Here's a sample desc.json file:
-        {
-            "tosa_file": "output.tosa",
-            "ifm_name": [
-                "arg0_1"
-            ],
-            "ifm_file": [
-                "arg0_1.npy"
-            ],
-            "ofm_name": [
-                "quantized_decomposed_dequantize_per_tensor_default_1"
-            ],
-            "ofm_file": [
-                "ref-quantized_decomposed_dequantize_per_tensor_default_1.npy"
-            ],
-            "expected_return_code": 0,
-            "expected_failure": false
-        }
-
-        Todo:
-            * It would be nice to not rely on files on disk. Should be possible
-              as a next step. See:
-              https://review.mlplatform.org/plugins/gitiles/tosa/reference_model/#executable-usage
-        """
-
-        assert (
-            self._has_init_run
-        ), "RunnerUtil needs to be initialized using init_run() before running tosa reference."
-
-        all_desc_file_paths = [
-            str(path) for path in Path(self.intermediate_path).glob("desc*.json")
-        ]
-        assert (
-            all_desc_file_paths
-        ), f"No TOSA description file found in '{self.intermediate_path}'."
-        if len(all_desc_file_paths) != 1:
-            raise NotImplementedError(
-                "Graphs with more than one partition are currently not supported."
-            )
-
-        desc_file_path = all_desc_file_paths[0]
-        assert os.path.exists(
-            desc_file_path
-        ), f"desc_file_path: {desc_file_path} does not exist"
-
-        # Save the input data to disk as a .npy file, since that's what the TOSA
-        # reference model expects. Name of the file must match the name in
-        # desc.json, which is the tensor name from the graph + .npy
-        for input_name, quant_param, data in zip(
-            self.input_names, self.qp_input, inputs, strict=True
-        ):
-            save_npy(
-                self.intermediate_path, data, self.is_quantized, input_name, quant_param
-            )
+    output_nodes = get_output_nodes(exported_program)
+    output_np = []
+    for i, node in enumerate(output_nodes):
+        output_shape = node.meta["val"].shape
+        output_dtype = node.meta["val"].dtype
+        tosa_ref_output = np.fromfile(
+            os.path.join(intermediate_path, f"out-{i}.bin"),
+            torch_to_numpy_dtype_dict[output_dtype],
+        )
 
-        # Run the TOSA reference model via command line, this will produce a
-        # .npy file with the result (aka OFM).
-        assert (
-            shutil.which(self.tosa_ref_model_path) is not None
-        ), f"tosa_reference_model tool not found, did you run examples/arm/setup.sh? Path: {self.tosa_ref_model_path}"
-
-        cmd_ref_model = [
-            self.tosa_ref_model_path,
-            "--test_desc",
-            desc_file_path,
-            "-l",
-            _tosa_refmodel_loglevel(logger.level),
-        ]
-        _run_cmd(cmd_ref_model)
-
-        # Load desc.json, just to get the name of the output file above
-        with open(desc_file_path) as f:
-            desc_json = json.load(f)
-
-        tosa_ref_outputs = []
-        for ofm_file in desc_json["ofm_file"]:
-            ofm_file_npy = os.path.join(self.intermediate_path, ofm_file)
-
-            # Load the output file (OFM) and return it as a numpy array
-            tosa_ref_output = np.load(ofm_file_npy)
-
-            if self.is_quantized:
-                # Need to dequant back to FP32 for comparison with torch output
-                # Convert to int32 prior to dequantize the output
-                if tosa_ref_output.dtype == np.int8:
-                    tosa_ref_output = tosa_ref_output.astype(np.int32)
-                quant_param = self.qp_output
-                if quant_param is not None:
-                    # I.e. bool output is possible for quantized models
-                    tosa_ref_output = (
-                        tosa_ref_output - quant_param.zp
-                    ) * quant_param.scale
-
-            if tosa_ref_output.dtype == np.double:
-                tosa_ref_output = tosa_ref_output.astype("float32")
-            elif tosa_ref_output.dtype == bool:
-                # retain the bool output though for boolean related comparisons
-                tosa_ref_output = tosa_ref_output.astype("bool")
-
-            # tosa_output is a numpy array, convert to torch tensor for comparison
-            tosa_ref_outputs.append(torch.from_numpy(tosa_ref_output))
-
-        return tosa_ref_outputs
+        output_np.append(torch.from_numpy(tosa_ref_output).reshape(output_shape))
+    return tuple(output_np)
 
 
 def prep_data_for_save(
     data: torch.Tensor,
-    is_quantized: bool,
     input_name: str,
-    quant_param: QuantizationParams,
+    quant_param: Optional[QuantizationParams] = None,
 ):
     data_np = np.array(data.detach(), order="C").astype(
-        f"{data.dtype}".replace("torch.", "")
+        torch_to_numpy_dtype_dict[data.dtype]
     )
-
-    if is_quantized:
+    if quant_param is not None:
         assert quant_param.node_name in input_name, (
             f"The quantization params name '{quant_param.node_name}' does not "
             f"match the input tensor name '{input_name}'."
@@ -569,22 +360,20 @@ def prep_data_for_save(
 def save_npy(
     path: str,
     data,
-    is_quantized: bool,
     input_name: str,
-    quant_param: QuantizationParams,
+    quant_param: Optional[QuantizationParams] = None,
 ) -> str:
     """Serializes and saves 'data' as a .npy file, possibly quantizing it before.
 
     Parameters:
         path: the directory where to save the data.
         data: the data to save.
-        is_quantized: whether to quantize the data before saving it.
         input_name: the name of the file, without file-ending.
         quant_param: the parameters to use for quantization.
     Returns:
         the full file path of the output.
     """
-    data_np = prep_data_for_save(data, is_quantized, input_name, quant_param)
+    data_np = prep_data_for_save(data, input_name, quant_param)
     file_path = os.path.join(path, input_name + ".npy")
     np.save(file_path, data_np, allow_pickle=False)
 
@@ -594,22 +383,20 @@ def save_npy(
 def save_bytes(
     path: str,
     data,
-    is_quantized: bool,
     input_name: str,
-    quant_param: QuantizationParams,
+    quant_param: Optional[QuantizationParams] = None,
 ) -> str:
     """Serializes and saves 'data' in byte format, possibly quantizing it before.
 
     Parameters:
         path: the directory where to save the data.
         data: the data to save.
-        is_quantized: whether to quantize the data before saving it.
         input_name: the name of the file, without file-ending.
         quant_param: the parameters to use for quantization.
     Returns:
         the full file path of the output.
     """
-    data_np = prep_data_for_save(data, is_quantized, input_name, quant_param)
+    data_np = prep_data_for_save(data, input_name, quant_param)
     file_path = os.path.join(path, input_name + ".bin")
     with open(file_path, "w+b") as f:
         data_np_bytes = data_np.tobytes()
@@ -705,7 +492,7 @@ def _tosa_refmodel_loglevel(loglevel: int) -> str:
     return loglevel_map[clamped_logging_level]
 
 
-def run_tosa_graph_static(
+def run_tosa_graph(
     graph: TosaGraph,
     tosa_version: TosaSpecification,
     inputs: list[torch.Tensor],
@@ -740,11 +527,25 @@ def run_tosa_graph_static(
 
 
 def transpose_data_format(data: list[np.ndarray], to: Literal["NHWC", "NCHW"]):
-    if to == "NCHW":
-        dim_order = (0, 3, 1, 2)
-    if to == "NHWC":
-        dim_order = (0, 2, 3, 1)
+    match to:
+        case "NCHW":
+            dim_order = (0, 3, 1, 2)
+        case "NHWC":
+            dim_order = (0, 2, 3, 1)
+        case _:
+            raise NotImplementedError(f"Cant transpose to dim order {to}")
     for i in range(len(data)):
         if hasattr(data[i], "shape") and len(data[i].shape) == 4:
             # Copy is needed to force actual data conversion, not setting stride.
             data[i] = np.transpose(data[i], dim_order).copy()
+
+
+def get_target_board(compile_spec: list[CompileSpec]) -> str | None:
+    for spec in compile_spec:
+        if spec.key == "compile_flags":
+            flags = spec.value.decode()
+            if "u55" in flags:
+                return "corstone-300"
+            elif "u85" in flags:
+                return "corstone-320"
+    return None
diff --git a/backends/arm/test/tester/analyze_output_utils.py b/backends/arm/test/tester/analyze_output_utils.py
index 477a96652fe..3436bfe618a 100644
--- a/backends/arm/test/tester/analyze_output_utils.py
+++ b/backends/arm/test/tester/analyze_output_utils.py
@@ -7,10 +7,11 @@
 import tempfile
 
 import torch
+from executorch.backends.arm.arm_backend import get_intermediate_path
 from executorch.backends.arm.test.runner_utils import (
-    _get_input_quantization_params,
-    _get_output_nodes,
-    _get_output_quantization_params,
+    get_input_quantization_params,
+    get_output_nodes,
+    get_output_quantization_params,
 )
 
 from executorch.backends.xnnpack.test.tester.tester import Export, Quantize
@@ -220,7 +221,7 @@ def dump_error_output(
     # Capture assertion error and print more info
     banner = "=" * 40 + "TOSA debug info" + "=" * 40
     logger.error(banner)
-    path_to_tosa_files = tester.runner_util.intermediate_path
+    path_to_tosa_files = get_intermediate_path(tester.compile_spec)
 
     if path_to_tosa_files is None:
         path_to_tosa_files = tempfile.mkdtemp(prefix="executorch_result_dump_")
@@ -228,9 +229,9 @@ def dump_error_output(
     export_stage = tester.stages.get(tester.stage_name(Export), None)
     quantize_stage = tester.stages.get(tester.stage_name(Quantize), None)
     if export_stage is not None and quantize_stage is not None:
-        output_nodes = _get_output_nodes(export_stage.artifact)
-        qp_input = _get_input_quantization_params(export_stage.artifact)
-        qp_output = _get_output_quantization_params(output_nodes)
+        output_nodes = get_output_nodes(export_stage.artifact)
+        qp_input = get_input_quantization_params(export_stage.artifact)
+        qp_output = get_output_quantization_params(output_nodes)
         logger.error(f"Input QuantArgs: {qp_input}")
         logger.error(f"Output QuantArgs: {qp_output}")
 
diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py
index 5b2f9201fc5..2c11cedae11 100644
--- a/backends/arm/test/tester/arm_tester.py
+++ b/backends/arm/test/tester/arm_tester.py
@@ -5,6 +5,7 @@
 
 import logging
 
+import os
 from collections import Counter
 from pprint import pformat
 from typing import Iterable, List, Optional, Tuple, Union
@@ -22,10 +23,12 @@
     ArmQuantizer,
     get_symmetric_quantization_config,
 )
-from executorch.backends.arm.test.common import get_target_board
 from executorch.backends.arm.test.runner_utils import (
     dbg_tosa_fb_to_json,
-    RunnerUtil,
+    get_output_nodes,
+    get_output_quantization_params,
+    get_target_board,
+    run_corstone,
     TosaReferenceModelDispatch,
 )
 
@@ -46,6 +49,7 @@
 from tabulate import tabulate
 from torch.export.graph_signature import ExportGraphSignature, InputSpec, OutputSpec
 from torch.fx import Graph
+from torch.utils._pytree import tree_flatten
 
 
 logger = logging.getLogger(__name__)
@@ -109,18 +113,43 @@ def dump_artifact(self, path_to_dump: Optional[str]):
 
 
 class Serialize(tester.Serialize):
-    def __init__(self, runner_util: RunnerUtil, timeout: int = 1):
+    def __init__(self, compile_spec: list[CompileSpec], timeout):
         super().__init__()
-        self.runner = runner_util
-        self.runner.set_timeout(timeout)
+        self.timeout = timeout
+        self.executorch_program_manager: ExecutorchProgramManager | None
+        self.compile_spec = compile_spec
+
+    def run(self, artifact: ExecutorchProgramManager, inputs=None) -> None:
+        super().run(artifact, inputs)
+        # Keep the entire ExecutorchProgramManager for execution.
+        self.executorch_program_manager = artifact
 
     def run_artifact(self, inputs):
-        return self.runner.run_corstone(inputs)
+        if self.executorch_program_manager is None:
+            raise RuntimeError(
+                "Tried running artifact from Serialize stage without running the stage."
+            )
+        inputs_flattened, _ = tree_flatten(inputs)
+        intermediate_path = get_intermediate_path(self.compile_spec)
+        target_board = get_target_board(self.compile_spec)
+        elf_path = os.path.join(
+            "cmake-out",
+            f"arm_semihosting_executor_runner_{target_board}",
+            "arm_executor_runner",
+        )
+        if not os.path.exists(elf_path):
+            raise FileNotFoundError(
+                f"Did not find build arm_executor_runner in path {elf_path}, run setup_testing.sh?"
+            )
 
-    def dump_artifact(self, path_to_dump: Optional[str]):
-        if not path_to_dump:
-            path_to_dump = self.path + "/program.pte"
-        super().dump_artifact(path_to_dump)
+        return run_corstone(
+            self.executorch_program_manager,
+            inputs_flattened,
+            intermediate_path,
+            target_board,
+            elf_path,
+            self.timeout,
+        )
 
 
 class ToExecutorch(tester.ToExecutorch):
@@ -156,8 +185,7 @@ def __init__(
         self,
         model: torch.nn.Module,
         example_inputs: Tuple[torch.Tensor],
-        compile_spec: List[CompileSpec] = None,
-        tosa_ref_model_path: str | None = None,
+        compile_spec: List[CompileSpec],
     ):
         """
         Args:
@@ -166,13 +194,6 @@ def __init__(
             compile_spec (List[CompileSpec]): The compile spec to use
         """
 
-        # Initiate runner_util
-        intermediate_path = get_intermediate_path(compile_spec)
-        self.runner_util = RunnerUtil(
-            intermediate_path=intermediate_path,
-            tosa_ref_model_path=tosa_ref_model_path,
-        )
-
         self.compile_spec = compile_spec
         super().__init__(model, example_inputs)
         self.pipeline[self.stage_name(InitialModel)] = [
@@ -245,16 +266,12 @@ def serialize(
         self, serialize_stage: Optional[Serialize] = None, timeout: int = 480
     ):
         if serialize_stage is None:
-            serialize_stage = Serialize(self.runner_util, timeout=timeout)
+            serialize_stage = Serialize(self.compile_spec, timeout)
         assert (
             get_intermediate_path(self.compile_spec) is not None
         ), "Can't dump serialized file when compile specs do not contain an artifact path."
 
-        return (
-            super()
-            .serialize(serialize_stage)
-            .dump_artifact(get_intermediate_path(self.compile_spec) + "/program.pte")
-        )
+        return super().serialize(serialize_stage)
 
     def is_quantized(self) -> bool:
         return self.stages[self.stage_name(tester.Quantize)] is not None
@@ -263,7 +280,6 @@ def run_method_and_compare_outputs(
         self,
         inputs: Optional[Tuple[torch.Tensor]] = None,
         stage: Optional[str] = None,
-        target_board: Optional[str] = None,
         num_runs=1,
         atol=1e-03,
         rtol=1e-03,
@@ -287,9 +303,6 @@ def run_method_and_compare_outputs(
         edge_stage = self.stages[self.stage_name(tester.ToEdge)]
         if edge_stage is None:
             edge_stage = self.stages[self.stage_name(tester.ToEdgeTransformAndLower)]
-        assert (
-            self.runner_util is not None
-        ), "self.tosa_test_util is not initialized, cannot use run_method()"
         assert (
             edge_stage is not None
         ), "To compare outputs, at least the ToEdge or ToEdgeTransformAndLower stage needs to be run."
@@ -298,29 +311,19 @@ def run_method_and_compare_outputs(
         test_stage = self.stages[stage]
         is_quantized = self.is_quantized()
 
-        if target_board is None:
-            target_board = get_target_board(self.compile_spec)
-
-        exported_program = self.stages[self.stage_name(tester.Export)].artifact
-        edge_program = edge_stage.artifact.exported_program()
-
-        self.runner_util.init_run(
-            exported_program,
-            edge_program,
-            is_quantized,
-            target_board,
-        )
-
         if is_quantized:
             reference_stage = self.stages[self.stage_name(tester.Quantize)]
-            # bool output is quantized with none quantized output so allow
-            # self.runner_util.qp_output to be none
-            if self.runner_util.qp_output is not None:
-                quantization_scales = [qp.scale for qp in self.runner_util.qp_output]
         else:
-            quantization_scales = [None] * len(self.runner_util.output_nodes)
             reference_stage = self.stages[self.stage_name(InitialModel)]
 
+        exported_program = self.stages[self.stage_name(tester.Export)].artifact
+        output_nodes = get_output_nodes(exported_program)
+        output_qparams = get_output_quantization_params(output_nodes)
+
+        quantization_scales = []
+        for node in output_qparams:
+            quantization_scales.append(getattr(output_qparams[node], "scale", None))
+
         logger.info(
             f"Comparing Stage '{self.stage_name(test_stage)}' with Stage '{self.stage_name(reference_stage)}'"
         )