Skip to content
95 changes: 95 additions & 0 deletions .ci/scripts/test_llama_lora.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -exu
# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"

cmake_install_executorch_libraries() {
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
rm -rf cmake-out
retry cmake --preset llm \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release
cmake --build cmake-out -j9 --target install --config Release
}

cmake_build_llama_runner() {
echo "Building llama runner"
pushd extension/llm/tokenizers
echo "Updating tokenizers submodule"
git submodule update --init
popd
dir="examples/models/llama"
retry cmake \
-DBUILD_TESTING=OFF \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-Bcmake-out/${dir} \
${dir}
cmake --build cmake-out/${dir} -j9 --config Release
}

cleanup_files() {
echo "Deleting downloaded and generated files"
rm -rf "${DOWNLOADED_PATH}/"
rm result.txt
}

# Download model artifacts from HF Hub.
# Hosting in personal repo for now.
HF_MODEL_REPO="lucylq/llama3_1B_lora"
DOWNLOADED_PATH=$(
bash "$(dirname "${BASH_SOURCE[0]}")/download_hf_hub.sh" \
--model_id "${HF_MODEL_REPO}" \
--files "adapter_config.json" "adapter_model.pt" "consolidated.00.pth" "params.json" "tokenizer.model"
)

# Export model.
$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
base.params="${DOWNLOADED_PATH}/params.json" \
base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
model.use_kv_cache=true \
model.use_sdpa_with_kv_cache=true \
model.dtype_override="fp32" \
backend.xnnpack.enabled=true \
backend.xnnpack.extended_ops=true \
export.output_name="llama3_2_1B_lora.pte"

# Build llama runner.
cmake_install_executorch_libraries
cmake_build_llama_runner

# Run llama runner
RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --prompt='What happens if you eat watermelon seeds?' --temperature=0 --seq_len=10 --warmup=1"

NOW=$(date +"%H:%M:%S")
echo "Starting to run llama runner at ${NOW}"
# shellcheck source=/dev/null
cmake-out/examples/models/llama/llama_main ${RUNTIME_ARGS} > result.txt
NOW=$(date +"%H:%M:%S")
echo "Finished at ${NOW}"

RESULT=$(cat result.txt)
EXPECTED_PREFIX="What happens if you eat watermelon seeds? Eating watermelon seeds can be a bit tricky,"

if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
echo "Expected result prefix: ${EXPECTED_PREFIX}"
echo "Actual result: ${RESULT}"
echo "Success"
cleanup_files
else
echo "Expected result prefix: ${EXPECTED_PREFIX}"
echo "Actual result: ${RESULT}"
echo "Failure; results not the same"

cleanup_files
exit 1
fi
27 changes: 27 additions & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,33 @@ jobs:
# run llama runner in eager mode
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh

test-llama-lora-linux:
name: test-llama-lora-linux
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
strategy:
fail-fast: false
with:
runner: linux.24xlarge
docker-image: ci-image:executorch-ubuntu-22.04-clang12
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"

PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"

# Install llama requirements
bash examples/models/llama/install_requirements.sh

# run llama runner in eager mode
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_lora.sh

test-mediatek-models-linux:
name: test-mediatek-models-linux
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
Expand Down
6 changes: 5 additions & 1 deletion extension/llm/export/config/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class PreqMode(str, Enum):
@dataclass
class BaseConfig:
"""
Configurations specific to the model, e.g. whether its Qwen3 or Phi-4-mini,
Configurations specific to the model, e.g. whether it's Qwen3 or Phi-4-mini,
and are the minimal set of parameters needed to load the pretrained
eager model and its weights.
Expand Down Expand Up @@ -487,6 +487,10 @@ def from_args(cls, args: argparse.Namespace) -> "LlmConfig": # noqa: C901
llm_config.base.checkpoint = args.checkpoint
if hasattr(args, "checkpoint_dir"):
llm_config.base.checkpoint_dir = args.checkpoint_dir
if hasattr(args, "adapter_checkpoint"):
llm_config.base.adapter_checkpoint = args.adapter_checkpoint
if hasattr(args, "adapter_config"):
llm_config.base.adapter_config = args.adapter_config
if hasattr(args, "tokenizer_path"):
llm_config.base.tokenizer_path = args.tokenizer_path
if hasattr(args, "metadata"):
Expand Down
Loading