pytorch
diff --git a/‎.ci/scripts/build-mediatek-sdk.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/build-mediatek-sdk.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_llama_lora.sh‎
Lines changed: 96 additions & 0 deletions b/‎.ci/scripts/test_llama_lora.sh‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 30 additions & 0 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/cadence/aot/compiler.py‎
Lines changed: 37 additions & 17 deletions b/‎backends/cadence/aot/compiler.py‎
Lines changed: 37 additions & 17 deletions
diff --git a/‎backends/cadence/aot/pass_utils.py‎
Lines changed: 6 additions & 1 deletion b/‎backends/cadence/aot/pass_utils.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎backends/cadence/aot/replace_ops.py‎
Lines changed: 3 additions & 2 deletions b/‎backends/cadence/aot/replace_ops.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎backends/mediatek/scripts/mtk_build.sh‎
Lines changed: 8 additions & 3 deletions b/‎backends/mediatek/scripts/mtk_build.sh‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎backends/vulkan/runtime/VulkanBackend.cpp‎
Lines changed: 0 additions & 7 deletions b/‎backends/vulkan/runtime/VulkanBackend.cpp‎
Lines changed: 0 additions & 7 deletions
diff --git a/‎backends/vulkan/runtime/api/Context.cpp‎
Lines changed: 3 additions & 13 deletions b/‎backends/vulkan/runtime/api/Context.cpp‎
Lines changed: 3 additions & 13 deletions
@@ -14,9 +14,9 @@ build_neuron_backend() {
   export NEURON_BUFFER_ALLOCATOR_LIB=${MEDIATEK_SDK_ROOT}/libneuron_buffer_allocator.so
   export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
 
-
   cd ${EXECUTORCH_ROOT}
   ./backends/mediatek/scripts/mtk_build.sh
+  ./examples/mediatek/mtk_build_examples.sh
 }
 
 build_neuron_backend
@@ -0,0 +1,96 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+# shellcheck source=/dev/null
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+cmake_install_executorch_libraries() {
+    echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
+    rm -rf cmake-out
+    retry cmake --preset llm \
+        -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DCMAKE_BUILD_TYPE=Release
+    cmake --build cmake-out -j9 --target install --config Release
+}
+
+cmake_build_llama_runner() {
+    echo "Building llama runner"
+    pushd extension/llm/tokenizers
+    echo "Updating tokenizers submodule"
+    git submodule update --init
+    popd
+    dir="examples/models/llama"
+    retry cmake \
+        -DBUILD_TESTING=OFF \
+        -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DCMAKE_BUILD_TYPE=Release \
+        -Bcmake-out/${dir} \
+        ${dir}
+    cmake --build cmake-out/${dir} -j9 --config Release
+}
+
+cleanup_files() {
+  echo "Deleting downloaded and generated files"
+  rm -rf "${DOWNLOADED_PATH}/"
+  rm result.txt
+}
+
+# Download model artifacts from HF Hub.
+# Hosting in personal repo for now.
+HF_MODEL_REPO="lucylq/llama3_1B_lora"
+DOWNLOADED_PATH=$(
+  bash "$(dirname "${BASH_SOURCE[0]}")/download_hf_hub.sh" \
+    --model_id "${HF_MODEL_REPO}" \
+    --files "adapter_config.json" "adapter_model.pt" "consolidated.00.pth" "params.json" "tokenizer.model"
+)
+EXPORTED_MODEL_NAME="llama_3_2_1B_lora.pte"
+# Export model.
+$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
+    base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+    base.params="${DOWNLOADED_PATH}/params.json" \
+    base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
+    base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
+    base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
+    model.use_kv_cache=true \
+    model.use_sdpa_with_kv_cache=true \
+    model.dtype_override="fp32" \
+    backend.xnnpack.enabled=true \
+    backend.xnnpack.extended_ops=true \
+    export.output_name="${EXPORTED_MODEL_NAME}"
+
+# Build llama runner.
+cmake_install_executorch_libraries
+cmake_build_llama_runner
+
+PROMPT="What happens if you eat watermelon seeds?"
+# Run llama runner
+RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --temperature=0 --seq_len=20 --warmup=1"
+
+NOW=$(date +"%H:%M:%S")
+echo "Starting to run llama runner at ${NOW}"
+# shellcheck source=/dev/null
+cmake-out/examples/models/llama/llama_main --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
+NOW=$(date +"%H:%M:%S")
+echo "Finished at ${NOW}"
+
+RESULT=$(cat result.txt)
+EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C,"
+
+if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
+  echo "Expected result prefix: ${EXPECTED_PREFIX}"
+  echo "Actual result: ${RESULT}"
+  echo "Success"
+  cleanup_files
+else
+  echo "Expected result prefix: ${EXPECTED_PREFIX}"
+  echo "Actual result: ${RESULT}"
+  echo "Failure; results not the same"
+
+  cleanup_files
+  exit 1
+fi
@@ -687,6 +687,36 @@ jobs:
         # run llama runner in eager mode
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
 
+  test-llama-lora-linux:
+    name: test-llama-lora-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.24xlarge
+      docker-image: ci-image:executorch-ubuntu-22.04-clang12
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+        # Install llama requirements
+        bash examples/models/llama/install_requirements.sh
+
+        # install a recent version of torchtune.
+        PYTHON_EXECUTABLE=python python -m pip install torchtune==0.7.0.dev20250730  --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+
+        # run llama runner in eager mode
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_lora.sh
+
   test-mediatek-models-linux:
     name: test-mediatek-models-linux
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
 
@@ -849,7 +849,7 @@ if(NOT EXECUTORCH_SELECT_OPS_YAML STREQUAL ""
     LIB_NAME
     "executorch_selected_kernels"
     OPS_SCHEMA_YAML
-    "${EXECUTORCH_SELECT_OPS_LIB}"
+    "${EXECUTORCH_SELECT_OPS_YAML}"
     ROOT_OPS
     "${EXECUTORCH_SELECT_OPS_LIST}"
     INCLUDE_ALL_OPS
 
@@ -172,29 +172,18 @@ def fuse_pt2(
     return converted_graph_module
 
 
-def quantize_pt2(
+# Note: quantizer is not optional here to force the user to supply a quantizer
+# and ensure consistency is more likely to be maintained.
+def get_fake_quant_model(
     model: torch.nn.Module,
     inputs: tuple[object, ...],
-    quantizer: Optional[CadenceQuantizer] = None,
+    quantizer: CadenceQuantizer,
     calibration_data: Optional[list[tuple[object, ...]]] = None,
     dump_graphs: bool = False,
-) -> ExportedProgram:
-    """
-    Trace, prepare, convert and fuse the model using the given quantizer.
-    If calibration data is provided, it will be used to calibrate the model. If
-    not, the inputs will be used for calibration instead, which is useful for
-    unit tests but should not be used for end-to-end use cases.
-    Returns a GraphModule with the quantized model.
-    Note: this function should not be called directly in general. Please use
-    quantize_and_export_to_executorch for most needs.
-    """
+) -> torch.fx.GraphModule:
     # Make the model inference mode by calling model.eval()
     model.eval()
 
-    # Instantiate the quantizer to CadenceQuantizer if not supplied
-    if not quantizer:
-        quantizer = CadenceDefaultQuantizer()
-
     program = trace(model, inputs, dump_graphs=dump_graphs)
 
     if dump_graphs:
@@ -214,6 +203,37 @@ def quantize_pt2(
 
     # Get converted graph module
     converted_gm = convert_pt2(prepared_gm, dump_graphs=dump_graphs)
+    return converted_gm
+
+
+def quantize_pt2(
+    model: torch.nn.Module,
+    inputs: tuple[object, ...],
+    quantizer: Optional[CadenceQuantizer] = None,
+    calibration_data: Optional[list[tuple[object, ...]]] = None,
+    dump_graphs: bool = False,
+) -> ExportedProgram:
+    """
+    Trace, prepare, convert and fuse the model using the given quantizer.
+    If calibration data is provided, it will be used to calibrate the model. If
+    not, the inputs will be used for calibration instead, which is useful for
+    unit tests but should not be used for end-to-end use cases.
+    Returns a GraphModule with the quantized model.
+    Note: this function should not be called directly in general. Please use
+    quantize_and_export_to_executorch for most needs.
+    """
+    # Instantiate the quantizer to CadenceQuantizer if not supplied
+    if not quantizer:
+        quantizer = CadenceDefaultQuantizer()
+
+    # Get the converted (aka fake quant) graph module
+    converted_gm = get_fake_quant_model(
+        model,
+        inputs,
+        quantizer=quantizer,
+        calibration_data=calibration_data,
+        dump_graphs=dump_graphs,
+    )
 
     # Get fused model
     fused_gm = fuse_pt2(converted_gm, quantizer)
@@ -237,7 +257,7 @@ def quantize_pt2(
     torch.ops.aten.angle.default,
     torch.ops.aten.rms_norm.default,
 ]
-TO_EDGE_PRESERVE_OPS: list[torch._ops.OpOverload, ...] = [
+TO_EDGE_PRESERVE_OPS: list[torch._ops.OpOverload] = [
     torch.ops.aten.rms_norm.default,
 ]
 
 
@@ -13,7 +13,7 @@
 from executorch.backends.cadence.aot.utils import get_edge_overload_packet
 
 from executorch.exir.dialects.edge._ops import EdgeOpOverload, EdgeOpOverloadPacket
-from executorch.exir.pass_base import PassBase
+from executorch.exir.pass_base import PassBase, PassResult
 
 from torch._ops import OpOverloadPacket
 
@@ -224,3 +224,8 @@ def set_arg(
         node.update_arg(idx, value)
     else:
         node.update_kwarg(kwarg_name, value)
+
+
+def none_throws(x: Optional[PassResult]) -> PassResult:
+    assert x is not None
+    return x
@@ -39,6 +39,7 @@
 )
 from executorch.backends.cadence.aot.pass_utils import (
     CadencePassAttribute,
+    none_throws,
     register_cadence_pass,
 )
 from executorch.backends.cadence.aot.remove_ops import RemoveNopSelectOpPass
@@ -1661,8 +1662,8 @@ def call_operator(self, op, args, kwargs, meta):
 
     def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
         result = super().call(graph_module)
-        result = FuseCascadedViewOps()(result.graph_module)
-        assert result is not None
+        fuse_cascaded_result = none_throws(FuseCascadedViewOps()(result.graph_module))
+        result = none_throws(ExportPass()(fuse_cascaded_result.graph_module))
         return result
 
 
 
@@ -4,18 +4,20 @@
 set -e
 
 # Define the directory where CMakeLists.txt is located
-SOURCE_DIR=$(realpath "$(dirname "$0")/../../..")
+EXECUTORCH_ROOT=$(realpath "$(dirname "$0")/../../..")
+echo EXECUTORCH_ROOT=${EXECUTORCH_ROOT}
 
 # Check if the ANDROID_NDK environment variable is set
 if [ -z "$ANDROID_NDK" ]; then
     echo "Error: ANDROID_NDK environment variable is not set." >&2
     exit 1
 fi
 
-# Create and enter the build directory
+# Enter the build directory
+cd "$EXECUTORCH_ROOT"
+
 # Set build directory
 build_dir="cmake-android-out"
-cd "$SOURCE_DIR"
 rm -rf "${build_dir}"
 
 # Configure the project with CMake
@@ -25,6 +27,9 @@ cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \
       -DANDROID_ABI=arm64-v8a \
       -DANDROID_NATIVE_API_LEVEL=26 \
       -DANDROID_PLATFORM=android-26 \
+      -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
       -DEXECUTORCH_BUILD_NEURON=ON \
       -B"${build_dir}"
 
 
@@ -509,13 +509,6 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
 
     compute_graph->prepack();
 
-    // If dynamic shapes are not expected, then the command buffer only needs to
-    // be encoded once. Otherwise, wait until the first inference to encode the
-    // the command buffer, when actual input shapes are known.
-    if (!compute_graph->graphconfig().expect_dynamic_shapes) {
-      compute_graph->encode_execute();
-    }
-
     return Error::Ok;
   }
 
 
@@ -38,8 +38,7 @@ Context::Context(vkapi::Adapter* adapter, const ContextConfig& config)
       querypool_(config_.query_pool_config, nullptr),
       // Command buffer submission
       cmd_mutex_{},
-      cmd_(VK_NULL_HANDLE, VK_NULL_HANDLE, 0u),
-      prev_semaphore_(VK_NULL_HANDLE),
+      cmd_(VK_NULL_HANDLE, 0u),
       submit_count_{0u},
       // Memory Management
       buffer_clearlist_mutex_{},
@@ -196,21 +195,14 @@ void Context::register_blit(
 }
 
 void Context::submit_cmd_to_gpu(VkFence fence_handle, const bool final_use) {
-  // Wait semaphore would be previous command buffer's signal semaphore
-  VkSemaphore wait_semaphore = prev_semaphore_;
-  // Signal semaphore for the the current command buffer
-  VkSemaphore signal_semaphore = cmd_.get_signal_semaphore();
-  // Next command buffer would wait on this command buffer's signal semaphore
-  prev_semaphore_ = signal_semaphore;
-
   if (cmd_) {
     cmd_.end();
     adapter_p_->submit_cmd(
         queue_,
         cmd_.get_submit_handle(final_use),
         fence_handle,
-        wait_semaphore,
-        signal_semaphore);
+        VK_NULL_HANDLE,
+        VK_NULL_HANDLE);
 
     submit_count_ = 0u;
   }
@@ -226,8 +218,6 @@ void Context::flush() {
   if (cmd_) {
     cmd_.invalidate();
   }
-  // Reset previous command buffer semaphore
-  prev_semaphore_ = VK_NULL_HANDLE;
 
   std::lock_guard<std::mutex> bufferlist_lock(buffer_clearlist_mutex_);
   std::lock_guard<std::mutex> imagelist_lock(image_clearlist_mutex_);
Original file line number	Diff line number	Diff line change
`@@ -14,9 +14,9 @@ build_neuron_backend() {`
`14`	`14`	`export NEURON_BUFFER_ALLOCATOR_LIB=${MEDIATEK_SDK_ROOT}/libneuron_buffer_allocator.so`
`15`	`15`	`export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"`
`16`	`16`
`17`		`-`
`18`	`17`	`cd ${EXECUTORCH_ROOT}`
`19`	`18`	`./backends/mediatek/scripts/mtk_build.sh`
	`19`	`+ ./examples/mediatek/mtk_build_examples.sh`
`20`	`20`	`}`
`21`	`21`
`22`	`22`	`build_neuron_backend`