From 1aabebc9d261db5dfdfc5b61b79a300e05ee89cd Mon Sep 17 00:00:00 2001 From: lucylq Date: Mon, 18 Aug 2025 15:09:48 -0700 Subject: [PATCH 1/3] lora --- .../cpp/lora_example/README.md | 73 +++++++++++++++++++ .../cpp/lora_example/build_example.sh | 0 .../cpp/lora_example/main.cpp | 0 program-data-separation/export_lora.sh | 0 4 files changed, 73 insertions(+) create mode 100644 program-data-separation/cpp/lora_example/README.md create mode 100644 program-data-separation/cpp/lora_example/build_example.sh create mode 100644 program-data-separation/cpp/lora_example/main.cpp create mode 100644 program-data-separation/export_lora.sh diff --git a/program-data-separation/cpp/lora_example/README.md b/program-data-separation/cpp/lora_example/README.md new file mode 100644 index 00000000..c106db79 --- /dev/null +++ b/program-data-separation/cpp/lora_example/README.md @@ -0,0 +1,73 @@ +# ExecuTorch Program Data Separation Demo C++. + +This directory contains the C++ code to run the examples generated in [program-data-separation](../program-data-separation/README.md). + + +## Virtual environment setup. +Create and activate a Python virtual environment: +```bash +python3 -m venv .venv && source .venv/bin/activate && pip install --upgrade pip +``` +Or alternatively, [install conda on your machine](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) +```bash +conda create -yn executorch-ptd python=3.10.0 && conda activate executorch-ptd +``` + +Install dependencies: +```bash +pip install executorch==0.7.0 +``` + +## Export the model/s. + +Change into the program-data-separation directory and create a directory to hold exported artifacts. +```bash +cd ~/executorch-examples/program-data-separation +mkdir models +``` + +Export models into the `models` directory. The first command will generated undelegated model/data files, and the second will generate XNNPACK-delegated model/data files. +```bash +./export_lora.sh +``` +Expect the files `lora.pte` and `lora.ptd`. + +Note: +- PTE: contains the program execution logic. +- PTD: contains the constant tensors used by the PTE. + +See [program-data-separation](../../program-data-separation/README.md) for instructions. + +## Install runtime dependencies. +The ExecuTorch repository is configured as a git submodule at `~/executorch-examples/program-data-separation/cpp/executorch`. To initialize it: +```bash +cd ~/executorch-examples/ +git submodule sync +git submodule update --init --recursive +``` +Install dev requirements for ExecuTorch + +```bash +cd ~/executorch-examples/program-data-separation/cpp/executorch +pip install -r requirements-dev.txt +``` + +## Build the runtime. +Build the executable: +```bash +cd ~/executorch-examples/program-data-separation/cpp/lora_example +chmod +x build_example.sh +./build_example.sh +``` + +## Run the executable. +``` +./build/bin/executorch_program_data_separation --model-path ../../models/linear.pte --data-path ../../models/linear.ptd + +./build/bin/executorch_program_data_separation --model-path ../../models/linear_xnnpack.pte --data-path ../../models/linear_xnnpack.ptd +``` + +## Clean up. +rm -rf build +cd ~/executorch-examples/program-data-separation +rm -rf models diff --git a/program-data-separation/cpp/lora_example/build_example.sh b/program-data-separation/cpp/lora_example/build_example.sh new file mode 100644 index 00000000..e69de29b diff --git a/program-data-separation/cpp/lora_example/main.cpp b/program-data-separation/cpp/lora_example/main.cpp new file mode 100644 index 00000000..e69de29b diff --git a/program-data-separation/export_lora.sh b/program-data-separation/export_lora.sh new file mode 100644 index 00000000..e69de29b From 9ceef855239ddbc08a93a13fa89b4d0d217d9fe5 Mon Sep 17 00:00:00 2001 From: lucylq Date: Mon, 18 Aug 2025 17:21:46 -0700 Subject: [PATCH 2/3] lora example --- program-data-separation/cpp/CMakeLists.txt | 55 ++++++++--- .../cpp/linear_example/build_example.sh | 2 +- .../cpp/lora_example/README.md | 39 +++++--- .../cpp/lora_example/build_example.sh | 15 +++ .../cpp/lora_example/main.cpp | 92 +++++++++++++++++++ program-data-separation/export_lora.sh | 53 +++++++++++ 6 files changed, 229 insertions(+), 27 deletions(-) diff --git a/program-data-separation/cpp/CMakeLists.txt b/program-data-separation/cpp/CMakeLists.txt index 75045c1f..ac7d9112 100644 --- a/program-data-separation/cpp/CMakeLists.txt +++ b/program-data-separation/cpp/CMakeLists.txt @@ -14,30 +14,59 @@ option(EXECUTORCH_BUILD_EXTENSION_TENSOR "" ON) option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON) option(EXECUTORCH_BUILD_XNNPACK "" ON) -# Add ExecuTorch subdirectory +# Dependencies required for llm runner in lora demo. +if(EXECUTORCH_BUILD_LORA_DEMO) +option(EXECUTORCH_BUILD_EXTENSION_LLM "" ON) +option(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER "" ON) +option(EXECUTORCH_BUILD_KERNELS_LLM "" ON) +option(EXECUTORCH_BUILD_KERNELS_LLM_AOT "" ON) +endif() + +# Add ExecuTorch subdirectory, after setting options. add_subdirectory("executorch") -set(DEMO_SOURCES linear_example/main.cpp) +set(LINK_LIBS executorch + executorch::extensions + xnnpack_backend + # NOTE: xnnpack_backend has to go before + # kernels otherwise it doesn't get registered. + executorch::kernels + gflags +) + +# Add sources and dependencies. +set(DEMO_SOURCES "") +if(EXECUTORCH_BUILD_LINEAR_DEMO) + list(APPEND DEMO_SOURCES "linear_example/main.cpp") +endif() +if(EXECUTORCH_BUILD_LORA_DEMO) + list(APPEND DEMO_SOURCES "lora_example/main.cpp") + add_subdirectory("executorch/examples/models/llama/runner") + list(APPEND LINK_LIBS llama_runner) +endif() # Create executable add_executable(executorch_program_data_separation ${DEMO_SOURCES}) -# Include directories -target_include_directories(executorch_program_data_separation PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) - # Link libraries target_link_libraries( executorch_program_data_separation - PRIVATE executorch - extension_module_static - extension_flat_tensor - extension_tensor - xnnpack_backend - portable_ops_lib - portable_kernels - gflags + PRIVATE ${LINK_LIBS} ) +# Include directories for lora demo. +if(EXECUTORCH_BUILD_LORA_DEMO) + # Include directories + target_include_directories(executorch_program_data_separation PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/executorch/extension/llm/tokenizers/include + ) + target_link_libraries( + executorch_program_data_separation + PUBLIC tokenizers::tokenizers + ) +endif() + # Set output directory set_target_properties(executorch_program_data_separation PROPERTIES diff --git a/program-data-separation/cpp/linear_example/build_example.sh b/program-data-separation/cpp/linear_example/build_example.sh index f94258ae..ce622cf8 100755 --- a/program-data-separation/cpp/linear_example/build_example.sh +++ b/program-data-separation/cpp/linear_example/build_example.sh @@ -7,7 +7,7 @@ mkdir -p build cd build # Configure CMake -cmake -DCMAKE_BUILD_TYPE=Release ../.. +cmake -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_BUILD_LINEAR_DEMO=True ../.. # Build the project cmake --build . -j$(nproc) diff --git a/program-data-separation/cpp/lora_example/README.md b/program-data-separation/cpp/lora_example/README.md index c106db79..9f89f03e 100644 --- a/program-data-separation/cpp/lora_example/README.md +++ b/program-data-separation/cpp/lora_example/README.md @@ -14,12 +14,16 @@ conda create -yn executorch-ptd python=3.10.0 && conda activate executorch-ptd ``` Install dependencies: -```bash -pip install executorch==0.7.0 +LoRA isn't available in the 0.7.0 release of ExecuTorch. Instead, please install from source until ExecuTorch 1.0 is released. + +[Install ExecuTorch pip package from source](https://docs.pytorch.org/executorch/stable/using-executorch-building-from-source.html#install-executorch-pip-package-from-source). + +Currently, the LoRA changes aren't in nightlies. Once they are in, you can also install from the nightly build. +``` +pip install executorch==0.8.0.devYYYYMMDD --extra-index-url https://download.pytorch.org/whl/nightly/cpu ``` ## Export the model/s. - Change into the program-data-separation directory and create a directory to hold exported artifacts. ```bash cd ~/executorch-examples/program-data-separation @@ -28,16 +32,22 @@ mkdir models Export models into the `models` directory. The first command will generated undelegated model/data files, and the second will generate XNNPACK-delegated model/data files. ```bash -./export_lora.sh +sh export_lora.sh ``` -Expect the files `lora.pte` and `lora.ptd`. +Expect the files: +- llama_3_2_1B.pte +- llama_3_2_1B.ptd +- llama_3_2_1B_lora.pte +- foundation_weights.ptd +- tokenizer.model + +llama_3_2_1B.ptd and foundation_weights.ptd contain the same contents, and you can remove llama_3_2_1B.ptd. +tokenizer.model is copied from the temp directory where we downloaded the HF artifacts. It will be used at runtime. Note: - PTE: contains the program execution logic. - PTD: contains the constant tensors used by the PTE. -See [program-data-separation](../../program-data-separation/README.md) for instructions. - ## Install runtime dependencies. The ExecuTorch repository is configured as a git submodule at `~/executorch-examples/program-data-separation/cpp/executorch`. To initialize it: ```bash @@ -53,21 +63,24 @@ pip install -r requirements-dev.txt ``` ## Build the runtime. +Install some dependencies: +```bash +cd ~/executorch-examples/program-data-separation/cpp/executorch +sh examples/models/llama/install_requirements.sh +``` + Build the executable: ```bash cd ~/executorch-examples/program-data-separation/cpp/lora_example -chmod +x build_example.sh -./build_example.sh +sh build_example.sh ``` ## Run the executable. ``` -./build/bin/executorch_program_data_separation --model-path ../../models/linear.pte --data-path ../../models/linear.ptd - -./build/bin/executorch_program_data_separation --model-path ../../models/linear_xnnpack.pte --data-path ../../models/linear_xnnpack.ptd +./build/bin/executorch_program_data_separation --lora_model_path=../../llama_3_2_1B_lora.pte --llama_model_path=../../llama_3_2_1B.pte --tokenizer_path=../../tokenizer.model --data_path=../../foundation.ptd ``` ## Clean up. rm -rf build cd ~/executorch-examples/program-data-separation -rm -rf models +rm -rf *.pte *.ptd tokenizer.model diff --git a/program-data-separation/cpp/lora_example/build_example.sh b/program-data-separation/cpp/lora_example/build_example.sh index e69de29b..6f63e825 100644 --- a/program-data-separation/cpp/lora_example/build_example.sh +++ b/program-data-separation/cpp/lora_example/build_example.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +# Clean and create build directory if it doesn't exist +rm -rf build +mkdir -p build +cd build + +# Configure CMake +cmake -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_BUILD_LORA_DEMO=True ../.. + +# Build the project +cmake --build . -j$(nproc) + +echo "Build complete! Executable located at: ./build/bin/executorch_program_data_separation" diff --git a/program-data-separation/cpp/lora_example/main.cpp b/program-data-separation/cpp/lora_example/main.cpp index e69de29b..25aca0d3 100644 --- a/program-data-separation/cpp/lora_example/main.cpp +++ b/program-data-separation/cpp/lora_example/main.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * @lint-ignore-every CLANGTIDY facebook-hte-Deprecated + */ +#include + +#include + +#if defined(ET_USE_THREADPOOL) +#include +#include +#endif + +DEFINE_string(lora_model_path, "llama_3_2_1B_lora.pte", + "LoRA model serialized in flatbuffer format."); +DEFINE_string(llama_model_path, "llama_3_2_1B.pte", + "Model serialized in flatbuffer format."); +DEFINE_string(data_path, "foundation.ptd", + "Data serialized in flatbuffer format."); + +DEFINE_string(tokenizer_path, "tokenizer.model", "Tokenizer stuff."); + +DEFINE_string(prompt, "The answer to the ultimate question is", "Prompt."); + +DEFINE_double(temperature, 0, + "Temperature; Default is 0. 0 = greedy argmax sampling " + "(deterministic). Lower temperature = more deterministic"); + +DEFINE_int32( + seq_len, 128, + "Total number of tokens to generate (prompt + output). Defaults to " + "max_seq_len. If the number of input tokens + seq_len > max_seq_len, the " + "output will be truncated to max_seq_len tokens."); + +using namespace ::executorch::extension; + +int main(int argc, char *argv[]) { + ET_LOG(Info, "Running program-data separation lora example..."); + + gflags::ParseCommandLineFlags(&argc, &argv, true); + + const char *lora_model_path = FLAGS_lora_model_path.c_str(); + const char *llama_model_path = FLAGS_llama_model_path.c_str(); + const char *data_path = FLAGS_data_path.c_str(); + + const char *tokenizer_path = FLAGS_tokenizer_path.c_str(); + const char *prompt = FLAGS_prompt.c_str(); + float temperature = FLAGS_temperature; + int32_t seq_len = 128; + int32_t cpu_threads = -1; + + // Create runner for lora model. + std::unique_ptr<::executorch::extension::llm::TextLLMRunner> lora_runner = + example::create_llama_runner(lora_model_path, tokenizer_path, data_path); + if (lora_runner == nullptr) { + ET_LOG(Error, "Failed to create lora_runner."); + return 1; + } + + // create runner for llama model + std::unique_ptr<::executorch::extension::llm::TextLLMRunner> llama_runner = + example::create_llama_runner(llama_model_path, tokenizer_path, data_path); + if (llama_runner == nullptr) { + ET_LOG(Error, "Failed to create llama_runner."); + return 1; + } + + // generate + executorch::extension::llm::GenerationConfig config{ + .seq_len = seq_len, .temperature = temperature}; + + auto error = lora_runner->generate(prompt, config); + if (error != executorch::runtime::Error::Ok) { + ET_LOG(Error, "Failed to generate with lora_runner, error code %zu.", + error); + return 1; + } + + ET_LOG(Info, "Generating with llama..."); + error = llama_runner->generate(prompt, config); + if (error != executorch::runtime::Error::Ok) { + ET_LOG(Error, "Failed to generate with llama_runner, error code %zu.", + error); + return 1; + } + + return 0; +} diff --git a/program-data-separation/export_lora.sh b/program-data-separation/export_lora.sh index e69de29b..082de33b 100644 --- a/program-data-separation/export_lora.sh +++ b/program-data-separation/export_lora.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -exu + +python -m pip install torchtune==0.7.0.dev20250730 --extra-index-url https://download.pytorch.org/whl/nightly/cpu + +# Download model artifacts from HF. +DOWNLOADED_PATH=$(python -c " +from huggingface_hub import snapshot_download +path=snapshot_download( + repo_id=\"lucylq/llama3_1B_lora\", +) +import os +print(path) +") + +# Copy over tokenizer, for use at runtime. +cp "${DOWNLOADED_PATH}/tokenizer.model" . + +# Export a non-LoRA model with program-data separated. +MODEL="llama_3_2_1B" +python -m executorch.extension.llm.export.export_llm \ + base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \ + base.params="${DOWNLOADED_PATH}/params.json" \ + base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \ + model.use_kv_cache=true \ + model.use_sdpa_with_kv_cache=true \ + model.dtype_override="fp32" \ + backend.xnnpack.enabled=true \ + backend.xnnpack.extended_ops=true \ + export.output_name="${MODEL}.pte" \ + export.foundation_weights_file="${MODEL}.ptd" + +# Export a LoRA model, with program and data separated. +LORA_MODEL="llama_3_2_1B_lora" +python -m executorch.extension.llm.export.export_llm \ + base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \ + base.params="${DOWNLOADED_PATH}/params.json" \ + base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \ + base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \ + base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \ + model.use_kv_cache=true \ + model.use_sdpa_with_kv_cache=true \ + model.dtype_override="fp32" \ + backend.xnnpack.enabled=true \ + backend.xnnpack.extended_ops=true \ + export.output_name="${LORA_MODEL}.pte" \ + export.foundation_weights_file="foundation.ptd" From f2f06c1ebc34aec8c3b32098a1b40ef265a87232 Mon Sep 17 00:00:00 2001 From: lucylq Date: Wed, 20 Aug 2025 15:35:12 -0700 Subject: [PATCH 3/3] weight sharing --- program-data-separation/cpp/CMakeLists.txt | 2 - .../cpp/lora_example/README.md | 4 +- .../cpp/lora_example/build_example.sh | 2 +- .../cpp/lora_example/main.cpp | 84 +++++++++++++------ 4 files changed, 64 insertions(+), 28 deletions(-) diff --git a/program-data-separation/cpp/CMakeLists.txt b/program-data-separation/cpp/CMakeLists.txt index ac7d9112..44e83a9e 100644 --- a/program-data-separation/cpp/CMakeLists.txt +++ b/program-data-separation/cpp/CMakeLists.txt @@ -41,8 +41,6 @@ if(EXECUTORCH_BUILD_LINEAR_DEMO) endif() if(EXECUTORCH_BUILD_LORA_DEMO) list(APPEND DEMO_SOURCES "lora_example/main.cpp") - add_subdirectory("executorch/examples/models/llama/runner") - list(APPEND LINK_LIBS llama_runner) endif() # Create executable diff --git a/program-data-separation/cpp/lora_example/README.md b/program-data-separation/cpp/lora_example/README.md index 9f89f03e..44f158c0 100644 --- a/program-data-separation/cpp/lora_example/README.md +++ b/program-data-separation/cpp/lora_example/README.md @@ -76,7 +76,9 @@ sh build_example.sh ``` ## Run the executable. -``` +```bash +cd ~/executorch-examples/program-data-separation/cpp/lora_example + ./build/bin/executorch_program_data_separation --lora_model_path=../../llama_3_2_1B_lora.pte --llama_model_path=../../llama_3_2_1B.pte --tokenizer_path=../../tokenizer.model --data_path=../../foundation.ptd ``` diff --git a/program-data-separation/cpp/lora_example/build_example.sh b/program-data-separation/cpp/lora_example/build_example.sh index 6f63e825..0b4d194a 100644 --- a/program-data-separation/cpp/lora_example/build_example.sh +++ b/program-data-separation/cpp/lora_example/build_example.sh @@ -7,7 +7,7 @@ mkdir -p build cd build # Configure CMake -cmake -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_BUILD_LORA_DEMO=True ../.. +cmake -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_BUILD_LORA_DEMO=True -DEXECUTORCH_XNNPACK_ENABLE_WEIGHT_CACHE=True ../.. # Build the project cmake --build . -j$(nproc) diff --git a/program-data-separation/cpp/lora_example/main.cpp b/program-data-separation/cpp/lora_example/main.cpp index 25aca0d3..ab33d958 100644 --- a/program-data-separation/cpp/lora_example/main.cpp +++ b/program-data-separation/cpp/lora_example/main.cpp @@ -6,9 +6,18 @@ * LICENSE file in the root directory of this source tree. * @lint-ignore-every CLANGTIDY facebook-hte-Deprecated */ + +#include +#include +#include + #include -#include +#include +#include +#include +#include +#include #if defined(ET_USE_THREADPOOL) #include @@ -36,7 +45,30 @@ DEFINE_int32( "max_seq_len. If the number of input tokens + seq_len > max_seq_len, the " "output will be truncated to max_seq_len tokens."); -using namespace ::executorch::extension; +using executorch::extension::Module; +using executorch::runtime::Error; +namespace llm = executorch::extension::llm; + +namespace { +static constexpr int32_t kSpecialTokensSize = 256; +static inline std::unique_ptr> +_get_default_special_tokens() { + auto special_tokens = + std::make_unique>(std::vector{ + "<|begin_of_text|>", "<|end_of_text|>", + "<|reserved_special_token_0|>", "<|reserved_special_token_1|>", + "<|finetune_right_pad_id|>", "<|step_id|>", "<|start_header_id|>", + "<|end_header_id|>", "<|eom_id|>", "<|eot_id|>", "<|python_tag|>"}); + // pad the rest of the special tokens with reserved tokens + ssize_t reserved_special_token_num = 2; + while (special_tokens->size() < kSpecialTokensSize) { + special_tokens->emplace_back("<|reserved_special_token_" + + std::to_string(reserved_special_token_num++) + + "|>"); + } + return special_tokens; +} +} // namespace int main(int argc, char *argv[]) { ET_LOG(Info, "Running program-data separation lora example..."); @@ -53,37 +85,41 @@ int main(int argc, char *argv[]) { int32_t seq_len = 128; int32_t cpu_threads = -1; - // Create runner for lora model. - std::unique_ptr<::executorch::extension::llm::TextLLMRunner> lora_runner = - example::create_llama_runner(lora_model_path, tokenizer_path, data_path); - if (lora_runner == nullptr) { - ET_LOG(Error, "Failed to create lora_runner."); + // Create tokenizers. + std::unique_ptr tokenizer1 = + llm::load_tokenizer(tokenizer_path, _get_default_special_tokens()); + std::unique_ptr tokenizer2 = + llm::load_tokenizer(tokenizer_path, _get_default_special_tokens()); + + if (tokenizer1 == nullptr || tokenizer2 == nullptr) { + ET_LOG(Info, + "Failed to load %s as a Tiktoken, Sentencepiece or Llama2.c " + "tokenizer, make sure the artifact is one of these types", + tokenizer_path); return 1; } - // create runner for llama model - std::unique_ptr<::executorch::extension::llm::TextLLMRunner> llama_runner = - example::create_llama_runner(llama_model_path, tokenizer_path, data_path); - if (llama_runner == nullptr) { - ET_LOG(Error, "Failed to create llama_runner."); - return 1; - } + // Create runners. + std::unique_ptr llama_runner = + llm::create_text_llm_runner(llama_model_path, std::move(tokenizer1), + data_path, temperature); + std::unique_ptr lora_runner = llm::create_text_llm_runner( + lora_model_path, std::move(tokenizer2), data_path, temperature); - // generate - executorch::extension::llm::GenerationConfig config{ - .seq_len = seq_len, .temperature = temperature}; + // Generate. + llm::GenerationConfig config{.seq_len = seq_len, .temperature = temperature}; - auto error = lora_runner->generate(prompt, config); - if (error != executorch::runtime::Error::Ok) { - ET_LOG(Error, "Failed to generate with lora_runner, error code %zu.", + ET_LOG(Info, "Generating with llama..."); + auto error = llama_runner->generate(prompt, config); + if (error != Error::Ok) { + ET_LOG(Error, "Failed to generate with llama_runner, error code %zu.", error); return 1; } - ET_LOG(Info, "Generating with llama..."); - error = llama_runner->generate(prompt, config); - if (error != executorch::runtime::Error::Ok) { - ET_LOG(Error, "Failed to generate with llama_runner, error code %zu.", + error = lora_runner->generate(prompt, config); + if (error != Error::Ok) { + ET_LOG(Error, "Failed to generate with lora_runner, error code %zu.", error); return 1; }