Skip to content

Commit 1a01524

Browse files
committed
lora example
1 parent fde625f commit 1a01524

File tree

6 files changed

+229
-27
lines changed

6 files changed

+229
-27
lines changed

program-data-separation/cpp/CMakeLists.txt

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,30 +14,59 @@ option(EXECUTORCH_BUILD_EXTENSION_TENSOR "" ON)
1414
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON)
1515
option(EXECUTORCH_BUILD_XNNPACK "" ON)
1616

17-
# Add ExecuTorch subdirectory
17+
# Dependencies required for llm runner in lora demo.
18+
if(EXECUTORCH_BUILD_LORA_DEMO)
19+
option(EXECUTORCH_BUILD_EXTENSION_LLM "" ON)
20+
option(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER "" ON)
21+
option(EXECUTORCH_BUILD_KERNELS_LLM "" ON)
22+
option(EXECUTORCH_BUILD_KERNELS_LLM_AOT "" ON)
23+
endif()
24+
25+
# Add ExecuTorch subdirectory, after setting options.
1826
add_subdirectory("executorch")
1927

20-
set(DEMO_SOURCES linear_example/main.cpp)
28+
set(LINK_LIBS executorch
29+
executorch::extensions
30+
xnnpack_backend
31+
# NOTE: xnnpack_backend has to go before
32+
# kernels otherwise it doesn't get registered.
33+
executorch::kernels
34+
gflags
35+
)
36+
37+
# Add sources and dependencies.
38+
set(DEMO_SOURCES "")
39+
if(EXECUTORCH_BUILD_LINEAR_DEMO)
40+
list(APPEND DEMO_SOURCES "linear_example/main.cpp")
41+
endif()
42+
if(EXECUTORCH_BUILD_LORA_DEMO)
43+
list(APPEND DEMO_SOURCES "lora_example/main.cpp")
44+
add_subdirectory("executorch/examples/models/llama/runner")
45+
list(APPEND LINK_LIBS llama_runner)
46+
endif()
2147

2248
# Create executable
2349
add_executable(executorch_program_data_separation ${DEMO_SOURCES})
2450

25-
# Include directories
26-
target_include_directories(executorch_program_data_separation PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
27-
2851
# Link libraries
2952
target_link_libraries(
3053
executorch_program_data_separation
31-
PRIVATE executorch
32-
extension_module_static
33-
extension_flat_tensor
34-
extension_tensor
35-
xnnpack_backend
36-
portable_ops_lib
37-
portable_kernels
38-
gflags
54+
PRIVATE ${LINK_LIBS}
3955
)
4056

57+
# Include directories for lora demo.
58+
if(EXECUTORCH_BUILD_LORA_DEMO)
59+
# Include directories
60+
target_include_directories(executorch_program_data_separation PRIVATE
61+
${CMAKE_CURRENT_SOURCE_DIR}
62+
${CMAKE_CURRENT_SOURCE_DIR}/executorch/extension/llm/tokenizers/include
63+
)
64+
target_link_libraries(
65+
executorch_program_data_separation
66+
PUBLIC tokenizers::tokenizers
67+
)
68+
endif()
69+
4170
# Set output directory
4271
set_target_properties(executorch_program_data_separation
4372
PROPERTIES

program-data-separation/cpp/linear_example/build_example.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ mkdir -p build
77
cd build
88

99
# Configure CMake
10-
cmake -DCMAKE_BUILD_TYPE=Release ../..
10+
cmake -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_BUILD_LINEAR_DEMO=True ../..
1111

1212
# Build the project
1313
cmake --build . -j$(nproc)

program-data-separation/cpp/lora_example/README.md

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,16 @@ conda create -yn executorch-ptd python=3.10.0 && conda activate executorch-ptd
1414
```
1515

1616
Install dependencies:
17-
```bash
18-
pip install executorch==0.7.0
17+
LoRA isn't available in the 0.7.0 release of ExecuTorch. Instead, please install from source until ExecuTorch 1.0 is released.
18+
19+
[Install ExecuTorch pip package from source](https://docs.pytorch.org/executorch/stable/using-executorch-building-from-source.html#install-executorch-pip-package-from-source).
20+
21+
Currently, the LoRA changes aren't in nightlies. Once they are in, you can also install from the nightly build.
22+
```
23+
pip install executorch==0.8.0.devYYYYMMDD --extra-index-url https://download.pytorch.org/whl/nightly/cpu
1924
```
2025

2126
## Export the model/s.
22-
2327
Change into the program-data-separation directory and create a directory to hold exported artifacts.
2428
```bash
2529
cd ~/executorch-examples/program-data-separation
@@ -28,16 +32,22 @@ mkdir models
2832

2933
Export models into the `models` directory. The first command will generated undelegated model/data files, and the second will generate XNNPACK-delegated model/data files.
3034
```bash
31-
./export_lora.sh
35+
sh export_lora.sh
3236
```
33-
Expect the files `lora.pte` and `lora.ptd`.
37+
Expect the files:
38+
- llama_3_2_1B.pte
39+
- llama_3_2_1B.ptd
40+
- llama_3_2_1B_lora.pte
41+
- foundation_weights.ptd
42+
- tokenizer.model
43+
44+
llama_3_2_1B.ptd and foundation_weights.ptd contain the same contents, and you can remove llama_3_2_1B.ptd.
45+
tokenizer.model is copied from the temp directory where we downloaded the HF artifacts. It will be used at runtime.
3446

3547
Note:
3648
- PTE: contains the program execution logic.
3749
- PTD: contains the constant tensors used by the PTE.
3850

39-
See [program-data-separation](../../program-data-separation/README.md) for instructions.
40-
4151
## Install runtime dependencies.
4252
The ExecuTorch repository is configured as a git submodule at `~/executorch-examples/program-data-separation/cpp/executorch`. To initialize it:
4353
```bash
@@ -53,21 +63,24 @@ pip install -r requirements-dev.txt
5363
```
5464

5565
## Build the runtime.
66+
Install some dependencies:
67+
```bash
68+
cd ~/executorch-examples/program-data-separation/cpp/executorch
69+
sh examples/models/llama/install_requirements.sh
70+
```
71+
5672
Build the executable:
5773
```bash
5874
cd ~/executorch-examples/program-data-separation/cpp/lora_example
59-
chmod +x build_example.sh
60-
./build_example.sh
75+
sh build_example.sh
6176
```
6277

6378
## Run the executable.
6479
```
65-
./build/bin/executorch_program_data_separation --model-path ../../models/linear.pte --data-path ../../models/linear.ptd
66-
67-
./build/bin/executorch_program_data_separation --model-path ../../models/linear_xnnpack.pte --data-path ../../models/linear_xnnpack.ptd
80+
./build/bin/executorch_program_data_separation --lora_model_path=../../llama_3_2_1B_lora.pte --llama_model_path=../../llama_3_2_1B.pte --tokenizer_path=../../tokenizer.model --data_path=../../foundation.ptd
6881
```
6982

7083
## Clean up.
7184
rm -rf build
7285
cd ~/executorch-examples/program-data-separation
73-
rm -rf models
86+
rm -rf *.pte *.ptd tokenizer.model
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/bash
2+
set -e
3+
4+
# Clean and create build directory if it doesn't exist
5+
rm -rf build
6+
mkdir -p build
7+
cd build
8+
9+
# Configure CMake
10+
cmake -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_BUILD_LORA_DEMO=True ../..
11+
12+
# Build the project
13+
cmake --build . -j$(nproc)
14+
15+
echo "Build complete! Executable located at: ./build/bin/executorch_program_data_separation"
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
* @lint-ignore-every CLANGTIDY facebook-hte-Deprecated
8+
*/
9+
#include <gflags/gflags.h>
10+
11+
#include <executorch/examples/models/llama/runner/runner.h>
12+
13+
#if defined(ET_USE_THREADPOOL)
14+
#include <executorch/extension/threadpool/cpuinfo_utils.h>
15+
#include <executorch/extension/threadpool/threadpool.h>
16+
#endif
17+
18+
DEFINE_string(lora_model_path, "llama_3_2_1B_lora.pte",
19+
"LoRA model serialized in flatbuffer format.");
20+
DEFINE_string(llama_model_path, "llama_3_2_1B.pte",
21+
"Model serialized in flatbuffer format.");
22+
DEFINE_string(data_path, "foundation.ptd",
23+
"Data serialized in flatbuffer format.");
24+
25+
DEFINE_string(tokenizer_path, "tokenizer.model", "Tokenizer stuff.");
26+
27+
DEFINE_string(prompt, "The answer to the ultimate question is", "Prompt.");
28+
29+
DEFINE_double(temperature, 0,
30+
"Temperature; Default is 0. 0 = greedy argmax sampling "
31+
"(deterministic). Lower temperature = more deterministic");
32+
33+
DEFINE_int32(
34+
seq_len, 128,
35+
"Total number of tokens to generate (prompt + output). Defaults to "
36+
"max_seq_len. If the number of input tokens + seq_len > max_seq_len, the "
37+
"output will be truncated to max_seq_len tokens.");
38+
39+
using namespace ::executorch::extension;
40+
41+
int main(int argc, char *argv[]) {
42+
ET_LOG(Info, "Running program-data separation lora example...");
43+
44+
gflags::ParseCommandLineFlags(&argc, &argv, true);
45+
46+
const char *lora_model_path = FLAGS_lora_model_path.c_str();
47+
const char *llama_model_path = FLAGS_llama_model_path.c_str();
48+
const char *data_path = FLAGS_data_path.c_str();
49+
50+
const char *tokenizer_path = FLAGS_tokenizer_path.c_str();
51+
const char *prompt = FLAGS_prompt.c_str();
52+
float temperature = FLAGS_temperature;
53+
int32_t seq_len = 128;
54+
int32_t cpu_threads = -1;
55+
56+
// Create runner for lora model.
57+
std::unique_ptr<::executorch::extension::llm::TextLLMRunner> lora_runner =
58+
example::create_llama_runner(lora_model_path, tokenizer_path, data_path);
59+
if (lora_runner == nullptr) {
60+
ET_LOG(Error, "Failed to create lora_runner.");
61+
return 1;
62+
}
63+
64+
// create runner for llama model
65+
std::unique_ptr<::executorch::extension::llm::TextLLMRunner> llama_runner =
66+
example::create_llama_runner(llama_model_path, tokenizer_path, data_path);
67+
if (llama_runner == nullptr) {
68+
ET_LOG(Error, "Failed to create llama_runner.");
69+
return 1;
70+
}
71+
72+
// generate
73+
executorch::extension::llm::GenerationConfig config{
74+
.seq_len = seq_len, .temperature = temperature};
75+
76+
auto error = lora_runner->generate(prompt, config);
77+
if (error != executorch::runtime::Error::Ok) {
78+
ET_LOG(Error, "Failed to generate with lora_runner, error code %zu.",
79+
error);
80+
return 1;
81+
}
82+
83+
ET_LOG(Info, "Generating with llama...");
84+
error = llama_runner->generate(prompt, config);
85+
if (error != executorch::runtime::Error::Ok) {
86+
ET_LOG(Error, "Failed to generate with llama_runner, error code %zu.",
87+
error);
88+
return 1;
89+
}
90+
91+
return 0;
92+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
python -m pip install torchtune==0.7.0.dev20250730 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
11+
12+
# Download model artifacts from HF.
13+
DOWNLOADED_PATH=$(python -c "
14+
from huggingface_hub import snapshot_download
15+
path=snapshot_download(
16+
repo_id=\"lucylq/llama3_1B_lora\",
17+
)
18+
import os
19+
print(path)
20+
")
21+
22+
# Copy over tokenizer, for use at runtime.
23+
cp "${DOWNLOADED_PATH}/tokenizer.model" .
24+
25+
# Export a non-LoRA model with program-data separated.
26+
MODEL="llama_3_2_1B"
27+
python -m executorch.extension.llm.export.export_llm \
28+
base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
29+
base.params="${DOWNLOADED_PATH}/params.json" \
30+
base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
31+
model.use_kv_cache=true \
32+
model.use_sdpa_with_kv_cache=true \
33+
model.dtype_override="fp32" \
34+
backend.xnnpack.enabled=true \
35+
backend.xnnpack.extended_ops=true \
36+
export.output_name="${MODEL}.pte" \
37+
export.foundation_weights_file="${MODEL}.ptd"
38+
39+
# Export a LoRA model, with program and data separated.
40+
LORA_MODEL="llama_3_2_1B_lora"
41+
python -m executorch.extension.llm.export.export_llm \
42+
base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
43+
base.params="${DOWNLOADED_PATH}/params.json" \
44+
base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
45+
base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
46+
base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
47+
model.use_kv_cache=true \
48+
model.use_sdpa_with_kv_cache=true \
49+
model.dtype_override="fp32" \
50+
backend.xnnpack.enabled=true \
51+
backend.xnnpack.extended_ops=true \
52+
export.output_name="${LORA_MODEL}.pte" \
53+
export.foundation_weights_file="foundation.ptd"

0 commit comments

Comments
 (0)