pytorch
diff --git a/‎.ci/scripts/test_phi_3_mini.sh‎
Lines changed: 116 additions & 0 deletions b/‎.ci/scripts/test_phi_3_mini.sh‎
Lines changed: 116 additions & 0 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 27 additions & 0 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 6 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎backends/arm/arm_backend.py‎
Lines changed: 2 additions & 2 deletions b/‎backends/arm/arm_backend.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/test/models/test_mobilenet_v2_arm.py‎
Lines changed: 15 additions & 0 deletions b/‎backends/arm/test/models/test_mobilenet_v2_arm.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎backends/arm/test/ops/test_add.py‎
Lines changed: 33 additions & 6 deletions b/‎backends/arm/test/ops/test_add.py‎
Lines changed: 33 additions & 6 deletions
diff --git a/‎backends/arm/test/ops/test_avg_pool.py‎
Lines changed: 25 additions & 5 deletions b/‎backends/arm/test/ops/test_avg_pool.py‎
Lines changed: 25 additions & 5 deletions
diff --git a/‎backends/arm/test/ops/test_bmm.py‎
Lines changed: 17 additions & 4 deletions b/‎backends/arm/test/ops/test_bmm.py‎
Lines changed: 17 additions & 4 deletions
@@ -0,0 +1,116 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+BUILD_TYPE=${1:-Debug}
+BUILD_DIR=${3:-cmake-out}
+MODEL_DIR=examples/models/phi-3-mini
+
+echo "Building with BUILD_TYPE: $BUILD_TYPE, BUILD_DIR: $BUILD_DIR"
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+    PYTHON_EXECUTABLE=python3
+fi
+
+# Number of processes for a parallel build
+NPROC=8
+if hash nproc &> /dev/null; then NPROC=$(nproc); fi
+
+cmake_install_executorch_libraries() {
+  cmake -DPYTHON_EXECUTABLE=python \
+      -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
+      -DEXECUTORCH_ENABLE_LOGGING=1 \
+      -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+      -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
+      -DEXECUTORCH_BUILD_XNNPACK=ON \
+      -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
+      -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+      -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+      -B${BUILD_DIR} .
+
+  cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
+}
+
+cmake_build_phi_3_mini() {
+  cmake -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
+      -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
+      -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+      -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+      -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+      -DEXECUTORCH_BUILD_XNNPACK=ON \
+      -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
+      -B${BUILD_DIR}/${MODEL_DIR} \
+      ${MODEL_DIR}
+
+  cmake --build ${BUILD_DIR}/${MODEL_DIR} -j${NPROC} --config ${BUILD_TYPE}
+}
+
+# Download and convert tokenizer.model
+prepare_tokenizer() {
+  echo "Downloading and converting tokenizer.model"
+  wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true"
+  $PYTHON_EXECUTABLE -m executorch.extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
+}
+
+# Export phi-3-mini model to pte
+export_phi_3_mini () {
+  echo "Exporting phi-3-mini. This will take a few minutes"
+  $PYTHON_EXECUTABLE -m executorch.examples.models.phi-3-mini.export_phi-3-mini -c "4k" -s 128 -o phi-3-mini.pte
+}
+
+run_and_verify() {
+    NOW=$(date +"%H:%M:%S")
+    echo "Starting to run phi-3-mini runner at ${NOW}"
+    if [[ ! -f "phi-3-mini.pte" ]]; then
+        echo "Export failed. Abort"
+        exit 1
+    fi
+    if [[ ! -f "tokenizer.bin" ]]; then
+        echo "tokenizer.bin is missing."
+        exit 1
+    fi
+
+    ${BUILD_DIR}/${MODEL_DIR}/phi_3_mini_runner \
+    --model_path=phi-3-mini.pte \
+    --tokenizer_path=tokenizer.bin \
+    --seq_len=128 \
+    --temperature=0 \
+    --prompt="<|system|>
+You are a helpful assistant.<|end|>
+<|user|>
+What is the capital of France?<|end|>
+<|assistant|>" > result.txt
+
+    # verify result.txt
+    RESULT=$(cat result.txt)
+    EXPECTED_RESULT="The capital of France is Paris."
+    if [[ "${RESULT}" == *"${EXPECTED_RESULT}"* ]]; then
+        echo "Expected result prefix: ${EXPECTED_RESULT}"
+        echo "Actual result: ${RESULT}"
+        echo "Success"
+        exit 0
+    else
+        echo "Expected result prefix: ${EXPECTED_RESULT}"
+        echo "Actual result: ${RESULT}"
+        echo "Failure; results not the same"
+        exit 1
+    fi
+}
+
+# Step 1. Build ExecuTorch and phi-3-mini runner
+cmake_install_executorch_libraries
+cmake_build_phi_3_mini
+
+# Step 2. Export the tokenizer and model
+prepare_tokenizer
+export_phi_3_mini
+
+# Step 3. Run and verify result
+run_and_verify
@@ -414,3 +414,30 @@ jobs:
         PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
         # Test llama2
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
+
+  test-phi-3-mini-runner-linux:
+    name: test-phi-3-mini-runner-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.24xlarge
+      docker-image: executorch-ubuntu-22.04-clang12
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
+
+        # install pybind
+        bash install_requirements.sh --pybind xnnpack
+
+        # install phi-3-mini requirements
+        bash examples/models/phi-3-mini/install_requirements.sh
+
+        # run e2e (export, tokenizer and runner)
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_phi_3_mini.sh
@@ -680,11 +680,16 @@ if(EXECUTORCH_BUILD_PYBIND)
       etdump
       executorch
       extension_data_loader
-      portable_ops_lib
       util
       torch
   )
 
+  if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
+    list(APPEND _dep_libs optimized_native_cpu_ops_lib)
+  else()
+    list(APPEND _dep_libs portable_ops_lib)
+  endif()
+
   if(EXECUTORCH_BUILD_COREML)
     list(APPEND _dep_libs coremldelegate)
   endif()
 
@@ -52,8 +52,8 @@ def __init__(self):
     def ethosu_compile_spec(
         self,
         config: str,
-        system_config: Optional[str] = None,
-        memory_mode: Optional[str] = None,
+        system_config: str,
+        memory_mode: str,
         extra_flags: Optional[str] = None,
         config_ini: Optional[str] = "Arm/vela.ini",
     ) -> "ArmCompileSpecBuilder":
 
@@ -102,3 +102,18 @@ def test_mv2_u55_BI(self):
             tester.run_method_and_compare_outputs(
                 atol=1.0, qtol=1, inputs=self.model_inputs
             )
+
+    def test_mv2_u85_BI(self):
+        (
+            ArmTester(
+                self.mv2,
+                example_inputs=self.model_inputs,
+                compile_spec=common.get_u85_compile_spec(permute_memory_to_nhwc=True),
+            )
+            .quantize()
+            .export()
+            .to_edge(config=self._edge_compile_config)
+            .check(list(self.operators_after_quantization))
+            .partition()
+            .to_executorch()
+        )
@@ -13,6 +13,7 @@
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
 from executorch.exir import EdgeCompileConfig
+from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
 
@@ -92,16 +93,17 @@ def _test_add_tosa_BI_pipeline(
             .run_method_and_compare_outputs(inputs=test_data, qtol=1)
         )
 
-    def _test_add_u55_BI_pipeline(
+    def _test_add_ethos_BI_pipeline(
         self,
         module: torch.nn.Module,
+        compile_spec: CompileSpec,
         test_data: Tuple[torch.Tensor],
     ):
         tester = (
             ArmTester(
                 module,
                 example_inputs=test_data,
-                compile_spec=common.get_u55_compile_spec(permute_memory_to_nhwc=True),
+                compile_spec=compile_spec,
             )
             .quantize()
             .export()
@@ -114,8 +116,7 @@ def _test_add_u55_BI_pipeline(
             .serialize()
         )
 
-        if common.is_option_enabled("corstone300"):
-            tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
+        return tester
 
     @parameterized.expand(Add.test_parameters)
     def test_add_tosa_MI(self, test_data: torch.Tensor):
@@ -130,7 +131,22 @@ def test_add_tosa_BI(self, test_data: torch.Tensor):
     @parameterized.expand(Add.test_parameters)
     def test_add_u55_BI(self, test_data: torch.Tensor):
         test_data = (test_data,)
-        self._test_add_u55_BI_pipeline(self.Add(), test_data)
+        tester = self._test_add_ethos_BI_pipeline(
+            self.Add(),
+            common.get_u55_compile_spec(permute_memory_to_nhwc=True),
+            test_data,
+        )
+        if common.is_option_enabled("corstone300"):
+            tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
+
+    @parameterized.expand(Add.test_parameters)
+    def test_add_u85_BI(self, test_data: torch.Tensor):
+        test_data = (test_data,)
+        self._test_add_ethos_BI_pipeline(
+            self.Add(),
+            common.get_u85_compile_spec(permute_memory_to_nhwc=True),
+            test_data,
+        )
 
     @parameterized.expand(Add2.test_parameters)
     def test_add2_tosa_MI(self, operand1: torch.Tensor, operand2: torch.Tensor):
@@ -145,4 +161,15 @@ def test_add2_tosa_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
     @parameterized.expand(Add2.test_parameters)
     def test_add2_u55_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
         test_data = (operand1, operand2)
-        self._test_add_u55_BI_pipeline(self.Add2(), test_data)
+        tester = self._test_add_ethos_BI_pipeline(
+            self.Add2(), common.get_u55_compile_spec(), test_data
+        )
+        if common.is_option_enabled("corstone300"):
+            tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
+
+    @parameterized.expand(Add2.test_parameters)
+    def test_add2_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
+        test_data = (operand1, operand2)
+        self._test_add_ethos_BI_pipeline(
+            self.Add2(), common.get_u85_compile_spec(), test_data
+        )
@@ -13,6 +13,7 @@
 import torch
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
+from executorch.exir.backend.backend_details import CompileSpec
 from parameterized import parameterized
 
 logger = logging.getLogger(__name__)
@@ -86,14 +87,17 @@ def _test_avgpool2d_tosa_BI_pipeline(
             .run_method_and_compare_outputs(inputs=test_data, qtol=1)
         )
 
-    def _test_avgpool2d_tosa_u55_BI_pipeline(
-        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
+    def _test_avgpool2d_tosa_ethos_BI_pipeline(
+        self,
+        module: torch.nn.Module,
+        compile_spec: CompileSpec,
+        test_data: Tuple[torch.tensor],
     ):
         (
             ArmTester(
                 module,
                 example_inputs=test_data,
-                compile_spec=common.get_u55_compile_spec(permute_memory_to_nhwc=True),
+                compile_spec=compile_spec,
             )
             .quantize()
             .export()
@@ -141,6 +145,22 @@ def test_avgpool2d_tosa_u55_BI(
         test_data: torch.Tensor,
         model_params: int | Tuple[int, int],
     ):
-        self._test_avgpool2d_tosa_u55_BI_pipeline(
-            self.AvgPool2d(*model_params), (test_data,)
+        self._test_avgpool2d_tosa_ethos_BI_pipeline(
+            self.AvgPool2d(*model_params),
+            common.get_u55_compile_spec(permute_memory_to_nhwc=True),
+            (test_data,),
+        )
+
+    @parameterized.expand(test_data_suite)
+    @unittest.expectedFailure
+    def test_avgpool2d_tosa_u85_BI(
+        self,
+        test_name: str,
+        test_data: torch.Tensor,
+        model_params: int | Tuple[int, int],
+    ):
+        self._test_avgpool2d_tosa_ethos_BI_pipeline(
+            self.AvgPool2d(*model_params),
+            common.get_u85_compile_spec(permute_memory_to_nhwc=True),
+            (test_data,),
         )
@@ -11,6 +11,7 @@
 import torch
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
+from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 
 torch.manual_seed(1)
@@ -83,14 +84,17 @@ def _test_bmm_tosa_BI_pipeline(
             .run_method_and_compare_outputs(inputs=test_data)
         )
 
-    def _test_bmm_u55_BI_pipeline(
-        self, module: torch.nn.Module, test_data: Tuple[torch.Tensor, ...]
+    def _test_bmm_ethosu_BI_pipeline(
+        self,
+        module: torch.nn.Module,
+        compile_spec: CompileSpec,
+        test_data: Tuple[torch.Tensor, ...],
     ):
         (
             ArmTester(
                 module,
                 example_inputs=test_data,
-                compile_spec=common.get_u55_compile_spec(),
+                compile_spec=compile_spec,
             )
             .quantize()
             .export()
@@ -132,4 +136,13 @@ def test_bmm_u55_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
     @unittest.expectedFailure
     def test_bmm_single_input_u55_BI(self, operand1: torch.Tensor):
         test_data = (operand1,)
-        self._test_bmm_u55_BI_pipeline(self.BMMSingleInput(), test_data)
+        self._test_bmm_ethosu_BI_pipeline(
+            self.BMMSingleInput(), common.get_u55_compile_spec(), test_data
+        )
+
+    @parameterized.expand(BMMSingleInput.test_parameters)
+    def test_bmm_single_input_u85_BI(self, operand1: torch.Tensor):
+        test_data = (operand1,)
+        self._test_bmm_ethosu_BI_pipeline(
+            self.BMMSingleInput(), common.get_u85_compile_spec(), test_data
+        )