pytorch
diff --git a/‎.ci/scripts/utils.sh‎
Lines changed: 3 additions & 3 deletions b/‎.ci/scripts/utils.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 8 additions & 5 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎backends/apple/mps/setup.md‎
Lines changed: 1 addition & 1 deletion b/‎backends/apple/mps/setup.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/_passes/fuse_quantized_activation_pass.py‎
Lines changed: 4 additions & 3 deletions b/‎backends/arm/_passes/fuse_quantized_activation_pass.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎backends/arm/test/misc/test_multiple_outputs.py‎
Lines changed: 46 additions & 1 deletion b/‎backends/arm/test/misc/test_multiple_outputs.py‎
Lines changed: 46 additions & 1 deletion
diff --git a/‎backends/arm/test/runner_utils.py‎
Lines changed: 42 additions & 37 deletions b/‎backends/arm/test/runner_utils.py‎
Lines changed: 42 additions & 37 deletions
diff --git a/‎backends/arm/test/tester/analyze_output_utils.py‎
Lines changed: 4 additions & 4 deletions b/‎backends/arm/test/tester/analyze_output_utils.py‎
Lines changed: 4 additions & 4 deletions
@@ -17,17 +17,17 @@ retry () {
 }
 
 clean_executorch_install_folders() {
-  ./install_requirements.sh --clean
+  ./install_executorch.sh --clean
 }
 
 install_executorch() {
   which pip
   # Install executorch, this assumes that Executorch is checked out in the
   # current directory.
   if [[ "${1:-}" == "use-pt-pinned-commit" ]]; then
-    ./install_requirements.sh --pybind xnnpack --use-pt-pinned-commit
+    ./install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
   else
-    ./install_requirements.sh --pybind xnnpack
+    ./install_executorch.sh --pybind xnnpack
   fi
   # Just print out the list of packages for debugging
   pip list
 
@@ -9,7 +9,7 @@ on:
     paths:
       - .ci/scripts/setup-ios.sh
       - .github/workflows/apple.yml
-      - install_requirements.sh
+      - install_executorch.sh
       - backends/apple/**
       - build/build_apple_frameworks.sh
       - build/build_apple_llm_demo.sh
 
@@ -200,7 +200,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install Llava requirements
         bash examples/models/llama/install_requirements.sh
@@ -333,6 +333,9 @@ jobs:
 
   unittest-arm:
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -433,7 +436,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install phi-3-mini requirements
         bash examples/models/phi-3-mini/install_requirements.sh
@@ -460,7 +463,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
@@ -487,7 +490,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
@@ -514,7 +517,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
 
@@ -132,6 +132,9 @@ jobs:
   test-arm-backend-delegation:
     name: test-arm-backend-delegation
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -159,6 +162,9 @@ jobs:
   test-arm-reference-delegation:
     name: test-arm-reference-delegation
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-arm-sdk
 
@@ -97,7 +97,7 @@ I 00:00:00.122615 executorch:mps_executor_runner.mm:501] Model verified successf
 ### [Optional] Run the generated model directly using pybind
 1. Make sure `pybind` MPS support was installed:
 ```bash
-./install_requirements.sh --pybind mps
+./install_executorch.sh --pybind mps
 ```
 2. Run the `mps_example` script to trace the model and run it directly from python:
 ```bash
 
@@ -19,12 +19,13 @@ def _is_fuseable_quantized_activation(self, node: Node):
             is_fuseable = min_val == 0
 
         is_quantized = len(node.users) == 1 and next(iter(node.users)).target == q_op
-        if is_quantized:
+        if is_fuseable and is_quantized:
             quant_node = next(iter(node.users))
             zp = quant_node.args[2]
             qmin = quant_node.args[3]
-
-        return is_fuseable and is_quantized and zp == qmin
+            return zp == qmin
+        else:
+            return False
 
     def _is_fuseable_input(self, node: Node):
         return (
 
@@ -6,9 +6,11 @@
 
 import unittest
 
+import pytest
 import torch
-from executorch.backends.arm.test import common
+from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
+from executorch.exir.backend.compile_spec_schema import CompileSpec
 
 
 class TestMultipleOutputs(unittest.TestCase):
@@ -51,3 +53,46 @@ def test_tosa_BI_pipeline(self):
             .to_executorch()
             .run_method_and_compare_outputs(inputs=inputs, qtol=1.0)
         )
+
+    def _test_ethosu_BI_pipeline(
+        self,
+        module: torch.nn.Module,
+        test_data: tuple[torch.Tensor],
+        compile_spec: CompileSpec,
+    ):
+        tester = (
+            ArmTester(
+                module,
+                example_inputs=test_data,
+                compile_spec=compile_spec,
+            )
+            .quantize()
+            .export()
+            .to_edge_transform_and_lower()
+            .to_executorch()
+            .serialize()
+        )
+        if conftest.is_option_enabled("corstone_fvp"):
+            tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
+
+    @pytest.mark.corstone_fvp
+    def test_u85_BI(self):
+        module = self.MultipleOutputsModule()
+        test_data = module.get_inputs()
+        self._test_ethosu_BI_pipeline(
+            module,
+            test_data,
+            common.get_u85_compile_spec(),
+        )
+
+    @pytest.mark.corstone_fvp
+    @conftest.expectedFailureOnFVP
+    # TODO MLETORCH-598
+    def test_u55_BI(self):
+        module = self.MultipleOutputsModule()
+        test_data = module.get_inputs()
+        self._test_ethosu_BI_pipeline(
+            module,
+            test_data,
+            common.get_u55_compile_spec(),
+        )
@@ -115,50 +115,53 @@ def _get_input_quantization_params(
     return quant_params
 
 
-def _get_output_node(program: ExportedProgram) -> Node:
+def _get_output_nodes(program: ExportedProgram) -> list[Node]:
     """
     Get output node to this model.
 
     Args:
-        program (ExportedProgram): The program to get output node from.
+        program (ExportedProgram): The program to get the output nodes from.
     Returns:
-        The node that is the output of 'program'.
+        The nodes that are the outputs of the 'program'.
     """
-
+    output_nodes = []
     for node in program.graph.nodes:
         if node.op == "output":
-            return node
-    raise RuntimeError("No output node found.")
+            for output in node.args[0]:
+                output_nodes.append(output)
+    if len(output_nodes) == 0:
+        raise RuntimeError("No output nodes found.")
+    else:
+        return output_nodes
 
 
 def _get_output_quantization_params(
-    program: ExportedProgram, output_node: Node
-) -> Optional[QuantizationParams]:
+    output_nodes: list[Node],
+) -> List[QuantizationParams]:
     """
     Get output QuantizationParams from a program.
     Args:
-        program (ExportedProgram): The program to get output quantization parameters from.
+        output_nodes (list(Node)): A list of output nodes to get output quantization parameters from.
     Returns:
         QuantizationParams: The found quantization parameters.
     Raises:
         RuntimeError if no output quantization parameters are found.
     """
-
-    quant_params = None
-    for node in program.graph.nodes:
-        if (
-            node.target == torch.ops.quantized_decomposed.dequantize_per_tensor.default
-            and node == output_node.args[0][0]
-        ):
-            quant_params = QuantizationParams(
-                node_name=node.args[0].name,
-                scale=node.args[1],
-                zp=node.args[2],
-                qmin=node.args[3],
-                qmax=node.args[4],
-                dtype=node.args[5],
+    quant_params = []
+    for node in output_nodes:
+        if node.target == torch.ops.quantized_decomposed.dequantize_per_tensor.default:
+            quant_params.append(
+                QuantizationParams(
+                    node_name=node.args[0].name,
+                    scale=node.args[1],
+                    zp=node.args[2],
+                    qmin=node.args[3],
+                    qmax=node.args[4],
+                    dtype=node.args[5],
+                )
             )
-            break  # break early, there's only one output node
+    if len(quant_params) == 0:
+        raise RuntimeError("No Quantization parameters not found in exported model.")
     return quant_params
 
 
@@ -211,7 +214,7 @@ def __init__(
         self.input_names: list[str] = None
         self.output_name: str = None
         self.qp_input: list[QuantizationParams] = None
-        self.qp_output: QuantizationParams = None
+        self.qp_output: list[QuantizationParams] = None
         self.timeout = 480
         self.target_board: str = None
 
@@ -226,19 +229,17 @@ def init_run(
     ):
 
         self.input_names = _get_input_names(edge_program)
-        self.output_node = _get_output_node(exported_program)
-        self.output_name = self.output_node.name
+        self.output_nodes = _get_output_nodes(exported_program)
+
         self.is_quantized = is_quantized
         self.target_board = target_board
 
         if is_quantized:
             self.qp_input = _get_input_quantization_params(exported_program)
-            self.qp_output = _get_output_quantization_params(
-                exported_program, self.output_node
-            )
+            self.qp_output = _get_output_quantization_params(self.output_nodes)
         else:
             self.qp_input = [None] * len(self.input_names)
-            self.qp_output = None
+            self.qp_output = [None] * len(self.output_nodes)
 
         self._has_init_run = True
 
@@ -265,7 +266,7 @@ def run_corstone(
             save_bytes(self.intermediate_path, data, False, input_name, quant_param)
 
         out_path = os.path.join(self.intermediate_path, "out")
-        out_path_with_suffix = out_path + "-0.bin"
+
         input_paths = []
         for name in self.input_names:
             input_paths.append(
@@ -281,6 +282,7 @@ def run_corstone(
         ), f"Did not find build arm_executor_runner in path {elf_path}, run setup_testing.sh?"
 
         cmd_line = f"executor_runner -m {pte_path} -o {out_path}"
+
         for input_path in input_paths:
             cmd_line += f" -i {input_path}"
 
@@ -362,11 +364,14 @@ def run_corstone(
             raise RuntimeError(
                 f"Corstone simulation failed:\ncmd: {command_args[self.target_board]}\n, log: \n {result_stdout}\n{result.stderr.decode()}"
             )
-
-        tosa_ref_output = np.fromfile(out_path_with_suffix, dtype=np.float32)
-        output_shape = self.output_node.args[0][0].meta["val"].shape
-        tosa_ref_output = torch.from_numpy(tosa_ref_output).reshape(output_shape)
-        return tosa_ref_output
+        output_np = []
+        for i, node in enumerate(self.output_nodes):
+            tosa_ref_output = np.fromfile(
+                os.path.join(self.intermediate_path, f"out-{i}.bin"), dtype=np.float32
+            )
+            output_shape = node.meta["val"].shape
+            output_np.append(torch.from_numpy(tosa_ref_output).reshape(output_shape))
+        return tuple(output_np)
 
     def run_tosa_graph(
         self, graph: TosaGraph, inputs: list[np.ndarray] | list[torch.Tensor]
 
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -9,7 +9,7 @@
 import torch
 from executorch.backends.arm.test.runner_utils import (
     _get_input_quantization_params,
-    _get_output_node,
+    _get_output_nodes,
     _get_output_quantization_params,
 )
 
@@ -228,9 +228,9 @@ def dump_error_output(
     export_stage = tester.stages.get(tester.stage_name(Export), None)
     quantize_stage = tester.stages.get(tester.stage_name(Quantize), None)
     if export_stage is not None and quantize_stage is not None:
-        output_node = _get_output_node(export_stage.artifact)
+        output_nodes = _get_output_nodes(export_stage.artifact)
         qp_input = _get_input_quantization_params(export_stage.artifact)
-        qp_output = _get_output_quantization_params(export_stage.artifact, output_node)
+        qp_output = _get_output_quantization_params(output_nodes)
         logger.error(f"Input QuantArgs: {qp_input}")
         logger.error(f"Output QuantArgs: {qp_output}")