Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions .ci/scripts/test_llama_lora.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -exu
# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"

cmake_install_executorch_libraries() {
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
rm -rf cmake-out
retry cmake --preset llm \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release
cmake --build cmake-out -j9 --target install --config Release
}

cmake_build_llama_runner() {
echo "Building llama runner"
pushd extension/llm/tokenizers
echo "Updating tokenizers submodule"
git submodule update --init
popd
dir="examples/models/llama"
retry cmake \
-DBUILD_TESTING=OFF \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-Bcmake-out/${dir} \
${dir}
cmake --build cmake-out/${dir} -j9 --config Release
}

cleanup_files() {
echo "Deleting downloaded and generated files"
rm -rf "${DOWNLOADED_PATH}/"
rm result.txt
}

# Download model artifacts from HF Hub.
# Hosting in personal repo for now.
HF_MODEL_REPO="lucylq/llama3_1B_lora"
DOWNLOADED_PATH=$(
bash "$(dirname "${BASH_SOURCE[0]}")/download_hf_hub.sh" \
--model_id "${HF_MODEL_REPO}" \
--files "adapter_config.json" "adapter_model.pt" "consolidated.00.pth" "params.json" "tokenizer.model"
)
EXPORTED_MODEL_NAME="llama_3_2_1B_lora.pte"
# Export model.
$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
base.params="${DOWNLOADED_PATH}/params.json" \
base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
model.use_kv_cache=true \
model.use_sdpa_with_kv_cache=true \
model.dtype_override="fp32" \
backend.xnnpack.enabled=true \
backend.xnnpack.extended_ops=true \
export.output_name="${EXPORTED_MODEL_NAME}"

# Build llama runner.
cmake_install_executorch_libraries
cmake_build_llama_runner

PROMPT="What happens if you eat watermelon seeds?"
# Run llama runner
RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --temperature=0 --seq_len=20 --warmup=1"

NOW=$(date +"%H:%M:%S")
echo "Starting to run llama runner at ${NOW}"
# shellcheck source=/dev/null
cmake-out/examples/models/llama/llama_main --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
NOW=$(date +"%H:%M:%S")
echo "Finished at ${NOW}"

RESULT=$(cat result.txt)
EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C,"

if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
echo "Expected result prefix: ${EXPECTED_PREFIX}"
echo "Actual result: ${RESULT}"
echo "Success"
cleanup_files
else
echo "Expected result prefix: ${EXPECTED_PREFIX}"
echo "Actual result: ${RESULT}"
echo "Failure; results not the same"

cleanup_files
exit 1
fi
30 changes: 30 additions & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,36 @@ jobs:
# run llama runner in eager mode
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh

test-llama-lora-linux:
name: test-llama-lora-linux
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
strategy:
fail-fast: false
with:
runner: linux.24xlarge
docker-image: ci-image:executorch-ubuntu-22.04-clang12
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"

PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"

# Install llama requirements
bash examples/models/llama/install_requirements.sh

# install a recent version of torchtune.
PYTHON_EXECUTABLE=python python -m pip install torchtune==0.7.0.dev20250730 --extra-index-url https://download.pytorch.org/whl/nightly/cpu

# run llama runner in eager mode
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_lora.sh

test-mediatek-models-linux:
name: test-mediatek-models-linux
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
Expand Down
6 changes: 5 additions & 1 deletion extension/llm/export/config/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class PreqMode(str, Enum):
@dataclass
class BaseConfig:
"""
Configurations specific to the model, e.g. whether its Qwen3 or Phi-4-mini,
Configurations specific to the model, e.g. whether it's Qwen3 or Phi-4-mini,
and are the minimal set of parameters needed to load the pretrained
eager model and its weights.

Expand Down Expand Up @@ -487,6 +487,10 @@ def from_args(cls, args: argparse.Namespace) -> "LlmConfig": # noqa: C901
llm_config.base.checkpoint = args.checkpoint
if hasattr(args, "checkpoint_dir"):
llm_config.base.checkpoint_dir = args.checkpoint_dir
if hasattr(args, "adapter_checkpoint"):
llm_config.base.adapter_checkpoint = args.adapter_checkpoint
if hasattr(args, "adapter_config"):
llm_config.base.adapter_config = args.adapter_config
if hasattr(args, "tokenizer_path"):
llm_config.base.tokenizer_path = args.tokenizer_path
if hasattr(args, "metadata"):
Expand Down
Loading