diff --git a/.ci/scripts/test_llama_lora.sh b/.ci/scripts/test_llama_lora.sh new file mode 100644 index 00000000000..5c87cb8da72 --- /dev/null +++ b/.ci/scripts/test_llama_lora.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -exu +# shellcheck source=/dev/null +source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" + +cmake_install_executorch_libraries() { + echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a" + rm -rf cmake-out + retry cmake --preset llm \ + -DCMAKE_INSTALL_PREFIX=cmake-out \ + -DCMAKE_BUILD_TYPE=Release + cmake --build cmake-out -j9 --target install --config Release +} + +cmake_build_llama_runner() { + echo "Building llama runner" + pushd extension/llm/tokenizers + echo "Updating tokenizers submodule" + git submodule update --init + popd + dir="examples/models/llama" + retry cmake \ + -DBUILD_TESTING=OFF \ + -DCMAKE_INSTALL_PREFIX=cmake-out \ + -DCMAKE_BUILD_TYPE=Release \ + -Bcmake-out/${dir} \ + ${dir} + cmake --build cmake-out/${dir} -j9 --config Release +} + +cleanup_files() { + echo "Deleting downloaded and generated files" + rm -rf "${DOWNLOADED_PATH}/" + rm result.txt +} + +# Download model artifacts from HF Hub. +# Hosting in personal repo for now. +HF_MODEL_REPO="lucylq/llama3_1B_lora" +DOWNLOADED_PATH=$( + bash "$(dirname "${BASH_SOURCE[0]}")/download_hf_hub.sh" \ + --model_id "${HF_MODEL_REPO}" \ + --files "adapter_config.json" "adapter_model.pt" "consolidated.00.pth" "params.json" "tokenizer.model" +) +EXPORTED_MODEL_NAME="llama_3_2_1B_lora.pte" +# Export model. +$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \ + base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \ + base.params="${DOWNLOADED_PATH}/params.json" \ + base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \ + base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \ + base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \ + model.use_kv_cache=true \ + model.use_sdpa_with_kv_cache=true \ + model.dtype_override="fp32" \ + backend.xnnpack.enabled=true \ + backend.xnnpack.extended_ops=true \ + export.output_name="${EXPORTED_MODEL_NAME}" + +# Build llama runner. +cmake_install_executorch_libraries +cmake_build_llama_runner + +PROMPT="What happens if you eat watermelon seeds?" +# Run llama runner +RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --temperature=0 --seq_len=20 --warmup=1" + +NOW=$(date +"%H:%M:%S") +echo "Starting to run llama runner at ${NOW}" +# shellcheck source=/dev/null +cmake-out/examples/models/llama/llama_main --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt +NOW=$(date +"%H:%M:%S") +echo "Finished at ${NOW}" + +RESULT=$(cat result.txt) +EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C," + +if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then + echo "Expected result prefix: ${EXPECTED_PREFIX}" + echo "Actual result: ${RESULT}" + echo "Success" + cleanup_files +else + echo "Expected result prefix: ${EXPECTED_PREFIX}" + echo "Actual result: ${RESULT}" + echo "Failure; results not the same" + + cleanup_files + exit 1 +fi diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index b697b4166e0..47166721cf0 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -687,6 +687,36 @@ jobs: # run llama runner in eager mode PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh + test-llama-lora-linux: + name: test-llama-lora-linux + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + with: + runner: linux.24xlarge + docker-image: ci-image:executorch-ubuntu-22.04-clang12 + submodules: 'recursive' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 90 + script: | + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake" + + # Install llama requirements + bash examples/models/llama/install_requirements.sh + + # install a recent version of torchtune. + PYTHON_EXECUTABLE=python python -m pip install torchtune==0.7.0.dev20250730 --extra-index-url https://download.pytorch.org/whl/nightly/cpu + + # run llama runner in eager mode + PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_lora.sh + test-mediatek-models-linux: name: test-mediatek-models-linux uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main diff --git a/extension/llm/export/config/llm_config.py b/extension/llm/export/config/llm_config.py index 3a67bf83dfd..ab14a0b4a49 100644 --- a/extension/llm/export/config/llm_config.py +++ b/extension/llm/export/config/llm_config.py @@ -60,7 +60,7 @@ class PreqMode(str, Enum): @dataclass class BaseConfig: """ - Configurations specific to the model, e.g. whether it’s Qwen3 or Phi-4-mini, + Configurations specific to the model, e.g. whether it's Qwen3 or Phi-4-mini, and are the minimal set of parameters needed to load the pretrained eager model and its weights. @@ -487,6 +487,10 @@ def from_args(cls, args: argparse.Namespace) -> "LlmConfig": # noqa: C901 llm_config.base.checkpoint = args.checkpoint if hasattr(args, "checkpoint_dir"): llm_config.base.checkpoint_dir = args.checkpoint_dir + if hasattr(args, "adapter_checkpoint"): + llm_config.base.adapter_checkpoint = args.adapter_checkpoint + if hasattr(args, "adapter_config"): + llm_config.base.adapter_config = args.adapter_config if hasattr(args, "tokenizer_path"): llm_config.base.tokenizer_path = args.tokenizer_path if hasattr(args, "metadata"):