pytorch
diff --git a/‎.ci/scripts/test_llama_lora.sh‎
Lines changed: 96 additions & 0 deletions b/‎.ci/scripts/test_llama_lora.sh‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 30 additions & 0 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/coreml/partition/coreml_partitioner.py‎
Lines changed: 80 additions & 27 deletions b/‎backends/apple/coreml/partition/coreml_partitioner.py‎
Lines changed: 80 additions & 27 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/multiarray.mm‎
Lines changed: 3 additions & 0 deletions b/‎backends/apple/coreml/runtime/delegate/multiarray.mm‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/test/ops/test_asinh.py‎
Lines changed: 12 additions & 12 deletions b/‎backends/arm/test/ops/test_asinh.py‎
Lines changed: 12 additions & 12 deletions
@@ -0,0 +1,96 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+# shellcheck source=/dev/null
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+cmake_install_executorch_libraries() {
+    echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
+    rm -rf cmake-out
+    retry cmake --preset llm \
+        -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DCMAKE_BUILD_TYPE=Release
+    cmake --build cmake-out -j9 --target install --config Release
+}
+
+cmake_build_llama_runner() {
+    echo "Building llama runner"
+    pushd extension/llm/tokenizers
+    echo "Updating tokenizers submodule"
+    git submodule update --init
+    popd
+    dir="examples/models/llama"
+    retry cmake \
+        -DBUILD_TESTING=OFF \
+        -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DCMAKE_BUILD_TYPE=Release \
+        -Bcmake-out/${dir} \
+        ${dir}
+    cmake --build cmake-out/${dir} -j9 --config Release
+}
+
+cleanup_files() {
+  echo "Deleting downloaded and generated files"
+  rm -rf "${DOWNLOADED_PATH}/"
+  rm result.txt
+}
+
+# Download model artifacts from HF Hub.
+# Hosting in personal repo for now.
+HF_MODEL_REPO="lucylq/llama3_1B_lora"
+DOWNLOADED_PATH=$(
+  bash "$(dirname "${BASH_SOURCE[0]}")/download_hf_hub.sh" \
+    --model_id "${HF_MODEL_REPO}" \
+    --files "adapter_config.json" "adapter_model.pt" "consolidated.00.pth" "params.json" "tokenizer.model"
+)
+EXPORTED_MODEL_NAME="llama_3_2_1B_lora.pte"
+# Export model.
+$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
+    base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+    base.params="${DOWNLOADED_PATH}/params.json" \
+    base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
+    base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
+    base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
+    model.use_kv_cache=true \
+    model.use_sdpa_with_kv_cache=true \
+    model.dtype_override="fp32" \
+    backend.xnnpack.enabled=true \
+    backend.xnnpack.extended_ops=true \
+    export.output_name="${EXPORTED_MODEL_NAME}"
+
+# Build llama runner.
+cmake_install_executorch_libraries
+cmake_build_llama_runner
+
+PROMPT="What happens if you eat watermelon seeds?"
+# Run llama runner
+RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --temperature=0 --seq_len=20 --warmup=1"
+
+NOW=$(date +"%H:%M:%S")
+echo "Starting to run llama runner at ${NOW}"
+# shellcheck source=/dev/null
+cmake-out/examples/models/llama/llama_main --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
+NOW=$(date +"%H:%M:%S")
+echo "Finished at ${NOW}"
+
+RESULT=$(cat result.txt)
+EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C,"
+
+if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
+  echo "Expected result prefix: ${EXPECTED_PREFIX}"
+  echo "Actual result: ${RESULT}"
+  echo "Success"
+  cleanup_files
+else
+  echo "Expected result prefix: ${EXPECTED_PREFIX}"
+  echo "Actual result: ${RESULT}"
+  echo "Failure; results not the same"
+
+  cleanup_files
+  exit 1
+fi
@@ -687,6 +687,36 @@ jobs:
         # run llama runner in eager mode
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
 
+  test-llama-lora-linux:
+    name: test-llama-lora-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.24xlarge
+      docker-image: ci-image:executorch-ubuntu-22.04-clang12
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+        # Install llama requirements
+        bash examples/models/llama/install_requirements.sh
+
+        # install a recent version of torchtune.
+        PYTHON_EXECUTABLE=python python -m pip install torchtune==0.7.0.dev20250730  --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+
+        # run llama runner in eager mode
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_lora.sh
+
   test-mediatek-models-linux:
     name: test-mediatek-models-linux
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
 
@@ -849,7 +849,7 @@ if(NOT EXECUTORCH_SELECT_OPS_YAML STREQUAL ""
     LIB_NAME
     "executorch_selected_kernels"
     OPS_SCHEMA_YAML
-    "${EXECUTORCH_SELECT_OPS_LIB}"
+    "${EXECUTORCH_SELECT_OPS_YAML}"
     ROOT_OPS
     "${EXECUTORCH_SELECT_OPS_LIST}"
     INCLUDE_ALL_OPS
 
@@ -20,6 +20,7 @@
     PartitionResult,
 )
 from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
+from executorch.exir.dialects._ops import ops as exir_ops
 from torch.export.exported_program import ExportedProgram
 from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner
 from torch.fx.passes.operator_support import OperatorSupportBase
@@ -56,6 +57,80 @@ def log_once(self, msg: str) -> None:
             logger.info(msg)
             self._logged_msgs.add(msg)
 
+    def should_skip_op_for_delegation(self, node_target_name: str) -> bool:
+        skipped_ops = self.skip_ops_for_coreml_delegation or []
+        if node_target_name in skipped_ops:
+            assert (
+                not self.lower_full_graph
+            ), f"Cannot skip {node_target_name} because lower_full_graph is True.  Please set skip_ops_for_coreml_delegation=None or lower_full_graph=False in the CoreMLPartitioner"
+            self.log_once(
+                "Skipping op for CoreML delegation because it is in skip_ops_for_coreml_delegation: "
+                + node_target_name
+            )
+            return True
+        return False
+
+    def should_override_support(self, node) -> bool:
+        # https://github.com/apple/coremltools/issues/2573
+        if (
+            node.target
+            in [
+                torch.ops.aten.sub.Tensor,
+                exir_ops.edge.aten.sub.Tensor,
+                torch.ops.aten.add.Tensor,
+                exir_ops.edge.aten.add.Tensor,
+            ]
+            and "alpha" in node.kwargs
+            and node.kwargs["alpha"] != 1
+        ):
+            self.log_once(
+                "torch.ops.aten.{sub, add}.Tensor with alpha != 1 is not supported by CoreML.  Overriding support."
+            )
+            return True
+
+        # https://github.com/apple/coremltools/issues/2565
+        if node.target in [
+            torch.ops.aten.diagonal.default,
+            torch.ops.aten.diagonal_copy.default,
+            exir_ops.edge.aten.diagonal.default,
+            exir_ops.edge.aten.diagonal_copy.default,
+        ]:
+            self.log_once(
+                "torch.ops.aten.diagonal.default has a bug in CoreML.  Overriding op support."
+            )
+            return True
+
+        # https://github.com/apple/coremltools/issues/2569
+        if node.target in [
+            torch.ops.aten.acosh.default,
+            exir_ops.edge.aten.acosh.default,
+            torch.ops.aten.asinh.default,
+            exir_ops.edge.aten.asinh.default,
+        ]:
+            self.log_once(
+                "torch.ops.aten.{acosh, asinh}.default is not supported by CoreML.  Overriding op support."
+            )
+            return True
+
+        # TODO: enable this after bugs in ExecuTorch's partitioner are fixed
+        # # If lower_full_graph=False, do not partition nodes with symbolic args because it can result in symbolic args
+        # # in the placeholders due to partitioning, which CoreML does not support
+        # if not self.lower_full_graph and any(
+        #     isinstance(arg, torch.fx.Node)
+        #     and isinstance(
+        #         arg.meta.get("val", None),
+        #         (torch.SymInt, torch.SymBool, torch.SymFloat),
+        #     )
+        #     for arg in node.args
+        # ):
+        #     self.log_once(
+        #         "Skipping op for CoreML delegation because it contains symbolic args: "
+        #         + node_target_name
+        #     )
+        #     return True
+
+        return False
+
     def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
         # get_attr node can always be supported on any backend
         if node.op == "get_attr":
@@ -64,38 +139,17 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
         elif node.op == "call_function":
             # skip ops if specified by user
             node_target_name = getattr(node.target, "__name__", "").lower()
-            if node_target_name in (self.skip_ops_for_coreml_delegation or []):
-                self.log_once(
-                    "Skipping op for CoreML delegation because it is in skip_ops_for_coreml_delegation: "
-                    + node_target_name
-                )
-                assert (
-                    not self.lower_full_graph
-                ), "Cannot have skip_ops_for_coreml_delegation when lower_full_graph is True"
-                return False
 
-            # TODO: enable this after bugs in ExecuTorch's partitioner are fixed
-            # # If lower_full_graph=False, do not partition nodes with symbolic args because it can result in symbolic args
-            # # in the placeholders due to partitioning, which CoreML does not support
-            # if not self.lower_full_graph and any(
-            #     isinstance(arg, torch.fx.Node)
-            #     and isinstance(
-            #         arg.meta.get("val", None),
-            #         (torch.SymInt, torch.SymBool, torch.SymFloat),
-            #     )
-            #     for arg in node.args
-            # ):
-            #     self.log_once(
-            #         "Skipping op for CoreML delegation because it contains symbolic args: "
-            #         + node_target_name
-            #     )
-            #     assert not self.lower_full_graph
-            #     return False
+            if self.should_skip_op_for_delegation(node_target_name):
+                return False
 
             # query coremltools to see if node is supported
             is_supported = ct.converters.mil.frontend.torch.is_torch_fx_node_supported(
                 node
             )
+            if self.should_override_support(node):
+                is_supported = False
+
             if not is_supported:
                 if self.lower_full_graph:
                     raise NotImplementedError(
@@ -126,7 +180,6 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
 
 
 class CoreMLPartitioner(Partitioner):
-
     def __init__(
         self,
         *,
 
@@ -123,6 +123,9 @@ bool init_bnns_descriptor(BNNSNDArrayDescriptor& bnns_descriptor, const MultiArr
 }
 
 bool copy_using_bnns(const MultiArray& src, MultiArray& dst) {
+    if (src.layout().dataType() != dst.layout().dataType()) {
+        return false;
+    }
     if (dst.layout().num_bytes() < src.layout().num_bytes()) {
         return false;
     }
 
@@ -9,10 +9,10 @@
 
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
-    EthosU55PipelineBI,
-    EthosU85PipelineBI,
-    TosaPipelineBI,
-    TosaPipelineMI,
+    EthosU55PipelineINT,
+    EthosU85PipelineINT,
+    TosaPipelineFP,
+    TosaPipelineINT,
 )
 
 input_t = Tuple[torch.Tensor]  # Input x
@@ -36,8 +36,8 @@ def forward(self, x):
 
 
 @common.parametrize("test_data", test_data_suite)
-def test_asin_tosa_MI(test_data: Tuple):
-    pipeline = TosaPipelineMI[input_t](
+def test_asinh_tosa_FP(test_data: Tuple):
+    pipeline = TosaPipelineFP[input_t](
         Asinh(),
         (test_data(),),
         aten_op,
@@ -47,8 +47,8 @@ def test_asin_tosa_MI(test_data: Tuple):
 
 
 @common.parametrize("test_data", test_data_suite)
-def test_asin_tosa_BI(test_data: Tuple):
-    pipeline = TosaPipelineBI[input_t](
+def test_asinh_tosa_INT(test_data: Tuple):
+    pipeline = TosaPipelineINT[input_t](
         Asinh(),
         (test_data(),),
         aten_op=[],
@@ -59,8 +59,8 @@ def test_asin_tosa_BI(test_data: Tuple):
 
 @common.parametrize("test_data", test_data_suite)
 @common.XfailIfNoCorstone300
-def test_asin_u55_BI(test_data: Tuple):
-    pipeline = EthosU55PipelineBI[input_t](
+def test_asinh_u55_INT(test_data: Tuple):
+    pipeline = EthosU55PipelineINT[input_t](
         Asinh(),
         (test_data(),),
         aten_ops=[],
@@ -70,8 +70,8 @@ def test_asin_u55_BI(test_data: Tuple):
 
 @common.parametrize("test_data", test_data_suite)
 @common.XfailIfNoCorstone320
-def test_asin_u85_BI(test_data: Tuple):
-    pipeline = EthosU85PipelineBI[input_t](
+def test_asinh_u85_INT(test_data: Tuple):
+    pipeline = EthosU85PipelineINT[input_t](
         Asinh(),
         (test_data(),),
         aten_ops=[],
Original file line number	Diff line number	Diff line change
`@@ -123,6 +123,9 @@ bool init_bnns_descriptor(BNNSNDArrayDescriptor& bnns_descriptor, const MultiArr`
`123`	`123`	`}`
`124`	`124`
`125`	`125`	`bool copy_using_bnns(const MultiArray& src, MultiArray& dst) {`
	`126`	`+ if (src.layout().dataType() != dst.layout().dataType()) {`
	`127`	`+ return false;`
	`128`	`+ }`
`126`	`129`	`if (dst.layout().num_bytes() < src.layout().num_bytes()) {`
`127`	`130`	`return false;`
`128`	`131`	`}`