pytorch · kirklandsign · Aug 1, 2025 · Jul 31, 2025
diff --git a/.ci/scripts/test_llama_lora.sh b/.ci/scripts/test_llama_lora.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+# shellcheck source=/dev/null
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+cmake_install_executorch_libraries() {
+    echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
+    rm -rf cmake-out
+    retry cmake --preset llm \
+        -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DCMAKE_BUILD_TYPE=Release
+    cmake --build cmake-out -j9 --target install --config Release
+}
+
+cmake_build_llama_runner() {
+    echo "Building llama runner"
+    pushd extension/llm/tokenizers
+    echo "Updating tokenizers submodule"
+    git submodule update --init
+    popd
+    dir="examples/models/llama"
+    retry cmake \
+        -DBUILD_TESTING=OFF \
+        -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DCMAKE_BUILD_TYPE=Release \
+        -Bcmake-out/${dir} \
+        ${dir}
+    cmake --build cmake-out/${dir} -j9 --config Release
+}
+
+cleanup_files() {
+  echo "Deleting downloaded and generated files"
+  rm -rf "${DOWNLOADED_PATH}/"
+  rm result.txt
+}
+
+# Download model artifacts from HF Hub.
+# Hosting in personal repo for now.
+HF_MODEL_REPO="lucylq/llama3_1B_lora"
+DOWNLOADED_PATH=$(
+  bash "$(dirname "${BASH_SOURCE[0]}")/download_hf_hub.sh" \
+    --model_id "${HF_MODEL_REPO}" \
+    --files "adapter_config.json" "adapter_model.pt" "consolidated.00.pth" "params.json" "tokenizer.model"
+)
+EXPORTED_MODEL_NAME="llama_3_2_1B_lora.pte"
+# Export model.
+$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
+    base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+    base.params="${DOWNLOADED_PATH}/params.json" \
+    base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
+    base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
+    base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
+    model.use_kv_cache=true \
+    model.use_sdpa_with_kv_cache=true \
+    model.dtype_override="fp32" \
+    backend.xnnpack.enabled=true \
+    backend.xnnpack.extended_ops=true \
+    export.output_name="${EXPORTED_MODEL_NAME}"
+
+# Build llama runner.
+cmake_install_executorch_libraries
+cmake_build_llama_runner
+
+PROMPT="What happens if you eat watermelon seeds?"
+# Run llama runner
+RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --temperature=0 --seq_len=20 --warmup=1"
+
+NOW=$(date +"%H:%M:%S")
+echo "Starting to run llama runner at ${NOW}"
+# shellcheck source=/dev/null
+cmake-out/examples/models/llama/llama_main --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
+NOW=$(date +"%H:%M:%S")
+echo "Finished at ${NOW}"
+
+RESULT=$(cat result.txt)
+EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C,"
+
+if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
+  echo "Expected result prefix: ${EXPECTED_PREFIX}"
+  echo "Actual result: ${RESULT}"
+  echo "Success"
+  cleanup_files
+else
+  echo "Expected result prefix: ${EXPECTED_PREFIX}"
+  echo "Actual result: ${RESULT}"
+  echo "Failure; results not the same"
+
+  cleanup_files
+  exit 1
+fi
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -687,6 +687,36 @@ jobs:
         # run llama runner in eager mode
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
 
+  test-llama-lora-linux:
+    name: test-llama-lora-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.24xlarge
+      docker-image: ci-image:executorch-ubuntu-22.04-clang12
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+        # Install llama requirements
+        bash examples/models/llama/install_requirements.sh
+
+        # install a recent version of torchtune.
+        PYTHON_EXECUTABLE=python python -m pip install torchtune==0.7.0.dev20250730  --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+
+        # run llama runner in eager mode
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_lora.sh
+
   test-mediatek-models-linux:
     name: test-mediatek-models-linux
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

@@ -60,7 +60,7 @@ class PreqMode(str, Enum):
 @dataclass
 class BaseConfig:
     """
-    Configurations specific to the model, e.g. whether it’s Qwen3 or Phi-4-mini,
+    Configurations specific to the model, e.g. whether it's Qwen3 or Phi-4-mini,
     and are the minimal set of parameters needed to load the pretrained
     eager model and its weights.
 
@@ -487,6 +487,10 @@ def from_args(cls, args: argparse.Namespace) -> "LlmConfig":  # noqa: C901
             llm_config.base.checkpoint = args.checkpoint
         if hasattr(args, "checkpoint_dir"):
             llm_config.base.checkpoint_dir = args.checkpoint_dir
+        if hasattr(args, "adapter_checkpoint"):
+            llm_config.base.adapter_checkpoint = args.adapter_checkpoint
+        if hasattr(args, "adapter_config"):
+            llm_config.base.adapter_config = args.adapter_config
         if hasattr(args, "tokenizer_path"):
             llm_config.base.tokenizer_path = args.tokenizer_path
         if hasattr(args, "metadata"):