From 1aabebc9d261db5dfdfc5b61b79a300e05ee89cd Mon Sep 17 00:00:00 2001
From: lucylq <lfq@meta.com>
Date: Mon, 18 Aug 2025 15:09:48 -0700
Subject: [PATCH 1/3] lora

---
 .../cpp/lora_example/README.md                | 73 +++++++++++++++++++
 .../cpp/lora_example/build_example.sh         |  0
 .../cpp/lora_example/main.cpp                 |  0
 program-data-separation/export_lora.sh        |  0
 4 files changed, 73 insertions(+)
 create mode 100644 program-data-separation/cpp/lora_example/README.md
 create mode 100644 program-data-separation/cpp/lora_example/build_example.sh
 create mode 100644 program-data-separation/cpp/lora_example/main.cpp
 create mode 100644 program-data-separation/export_lora.sh

diff --git a/program-data-separation/cpp/lora_example/README.md b/program-data-separation/cpp/lora_example/README.md
new file mode 100644
index 00000000..c106db79
--- /dev/null
+++ b/program-data-separation/cpp/lora_example/README.md
@@ -0,0 +1,73 @@
+# ExecuTorch Program Data Separation Demo C++.
+
+This directory contains the C++ code to run the examples generated in [program-data-separation](../program-data-separation/README.md).
+
+
+## Virtual environment setup.
+Create and activate a Python virtual environment:
+```bash
+python3 -m venv .venv && source .venv/bin/activate && pip install --upgrade pip
+```
+Or alternatively, [install conda on your machine](https://conda.io/projects/conda/en/latest/user-guide/install/index.html)
+```bash
+conda create -yn executorch-ptd python=3.10.0 && conda activate executorch-ptd
+```
+
+Install dependencies:
+```bash
+pip install executorch==0.7.0
+```
+
+## Export the model/s.
+
+Change into the program-data-separation directory and create a directory to hold exported artifacts.
+```bash
+cd ~/executorch-examples/program-data-separation
+mkdir models
+```
+
+Export models into the `models` directory. The first command will generated undelegated model/data files, and the second will generate XNNPACK-delegated model/data files.
+```bash
+./export_lora.sh
+```
+Expect the files `lora.pte` and `lora.ptd`.
+
+Note:
+- PTE: contains the program execution logic.
+- PTD: contains the constant tensors used by the PTE.
+
+See [program-data-separation](../../program-data-separation/README.md) for instructions.
+
+## Install runtime dependencies.
+The ExecuTorch repository is configured as a git submodule at `~/executorch-examples/program-data-separation/cpp/executorch`.  To initialize it:
+```bash
+cd ~/executorch-examples/
+git submodule sync
+git submodule update --init --recursive
+```
+Install dev requirements for ExecuTorch
+
+```bash
+cd ~/executorch-examples/program-data-separation/cpp/executorch
+pip install -r requirements-dev.txt
+```
+
+## Build the runtime.
+Build the executable:
+```bash
+cd ~/executorch-examples/program-data-separation/cpp/lora_example
+chmod +x build_example.sh
+./build_example.sh
+```
+
+## Run the executable.
+```
+./build/bin/executorch_program_data_separation --model-path ../../models/linear.pte --data-path ../../models/linear.ptd
+
+./build/bin/executorch_program_data_separation --model-path ../../models/linear_xnnpack.pte --data-path ../../models/linear_xnnpack.ptd
+```
+
+## Clean up.
+rm -rf build
+cd ~/executorch-examples/program-data-separation
+rm -rf models
diff --git a/program-data-separation/cpp/lora_example/build_example.sh b/program-data-separation/cpp/lora_example/build_example.sh
new file mode 100644
index 00000000..e69de29b
diff --git a/program-data-separation/cpp/lora_example/main.cpp b/program-data-separation/cpp/lora_example/main.cpp
new file mode 100644
index 00000000..e69de29b
diff --git a/program-data-separation/export_lora.sh b/program-data-separation/export_lora.sh
new file mode 100644
index 00000000..e69de29b

From 9ceef855239ddbc08a93a13fa89b4d0d217d9fe5 Mon Sep 17 00:00:00 2001
From: lucylq <lfq@meta.com>
Date: Mon, 18 Aug 2025 17:21:46 -0700
Subject: [PATCH 2/3] lora example

---
 program-data-separation/cpp/CMakeLists.txt    | 55 ++++++++---
 .../cpp/linear_example/build_example.sh       |  2 +-
 .../cpp/lora_example/README.md                | 39 +++++---
 .../cpp/lora_example/build_example.sh         | 15 +++
 .../cpp/lora_example/main.cpp                 | 92 +++++++++++++++++++
 program-data-separation/export_lora.sh        | 53 +++++++++++
 6 files changed, 229 insertions(+), 27 deletions(-)

diff --git a/program-data-separation/cpp/CMakeLists.txt b/program-data-separation/cpp/CMakeLists.txt
index 75045c1f..ac7d9112 100644
--- a/program-data-separation/cpp/CMakeLists.txt
+++ b/program-data-separation/cpp/CMakeLists.txt
@@ -14,30 +14,59 @@ option(EXECUTORCH_BUILD_EXTENSION_TENSOR "" ON)
 option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON)
 option(EXECUTORCH_BUILD_XNNPACK "" ON)
 
-# Add ExecuTorch subdirectory
+# Dependencies required for llm runner in lora demo.
+if(EXECUTORCH_BUILD_LORA_DEMO)
+option(EXECUTORCH_BUILD_EXTENSION_LLM "" ON)
+option(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER "" ON)
+option(EXECUTORCH_BUILD_KERNELS_LLM "" ON)
+option(EXECUTORCH_BUILD_KERNELS_LLM_AOT "" ON)
+endif()
+
+# Add ExecuTorch subdirectory, after setting options.
 add_subdirectory("executorch")
 
-set(DEMO_SOURCES linear_example/main.cpp)
+set(LINK_LIBS executorch
+              executorch::extensions
+              xnnpack_backend
+              # NOTE: xnnpack_backend has to go before
+              # kernels otherwise it doesn't get registered.
+              executorch::kernels
+              gflags
+)
+
+# Add sources and dependencies.
+set(DEMO_SOURCES "")
+if(EXECUTORCH_BUILD_LINEAR_DEMO)
+  list(APPEND DEMO_SOURCES "linear_example/main.cpp")
+endif()
+if(EXECUTORCH_BUILD_LORA_DEMO)
+  list(APPEND DEMO_SOURCES "lora_example/main.cpp")
+  add_subdirectory("executorch/examples/models/llama/runner")
+  list(APPEND LINK_LIBS llama_runner)
+endif()
 
 # Create executable
 add_executable(executorch_program_data_separation ${DEMO_SOURCES})
 
-# Include directories
-target_include_directories(executorch_program_data_separation PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
-
 # Link libraries
 target_link_libraries(
   executorch_program_data_separation
-  PRIVATE executorch
-          extension_module_static
-          extension_flat_tensor
-          extension_tensor
-          xnnpack_backend
-          portable_ops_lib
-          portable_kernels
-          gflags
+  PRIVATE ${LINK_LIBS}
 )
 
+# Include directories for lora demo.
+if(EXECUTORCH_BUILD_LORA_DEMO)
+  # Include directories
+  target_include_directories(executorch_program_data_separation PRIVATE
+      ${CMAKE_CURRENT_SOURCE_DIR}
+      ${CMAKE_CURRENT_SOURCE_DIR}/executorch/extension/llm/tokenizers/include
+  )
+  target_link_libraries(
+    executorch_program_data_separation
+    PUBLIC tokenizers::tokenizers
+  )
+endif()
+
 # Set output directory
 set_target_properties(executorch_program_data_separation
     PROPERTIES
diff --git a/program-data-separation/cpp/linear_example/build_example.sh b/program-data-separation/cpp/linear_example/build_example.sh
index f94258ae..ce622cf8 100755
--- a/program-data-separation/cpp/linear_example/build_example.sh
+++ b/program-data-separation/cpp/linear_example/build_example.sh
@@ -7,7 +7,7 @@ mkdir -p build
 cd build
 
 # Configure CMake
-cmake -DCMAKE_BUILD_TYPE=Release ../..
+cmake -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_BUILD_LINEAR_DEMO=True  ../..
 
 # Build the project
 cmake --build . -j$(nproc)
diff --git a/program-data-separation/cpp/lora_example/README.md b/program-data-separation/cpp/lora_example/README.md
index c106db79..9f89f03e 100644
--- a/program-data-separation/cpp/lora_example/README.md
+++ b/program-data-separation/cpp/lora_example/README.md
@@ -14,12 +14,16 @@ conda create -yn executorch-ptd python=3.10.0 && conda activate executorch-ptd
 ```
 
 Install dependencies:
-```bash
-pip install executorch==0.7.0
+LoRA isn't available in the 0.7.0 release of ExecuTorch. Instead, please install from source until ExecuTorch 1.0 is released.
+
+[Install ExecuTorch pip package from source](https://docs.pytorch.org/executorch/stable/using-executorch-building-from-source.html#install-executorch-pip-package-from-source).
+
+Currently, the LoRA changes aren't in nightlies. Once they are in, you can also install from the nightly build.
+```
+pip install executorch==0.8.0.devYYYYMMDD --extra-index-url https://download.pytorch.org/whl/nightly/cpu
 ```
 
 ## Export the model/s.
-
 Change into the program-data-separation directory and create a directory to hold exported artifacts.
 ```bash
 cd ~/executorch-examples/program-data-separation
@@ -28,16 +32,22 @@ mkdir models
 
 Export models into the `models` directory. The first command will generated undelegated model/data files, and the second will generate XNNPACK-delegated model/data files.
 ```bash
-./export_lora.sh
+sh export_lora.sh
 ```
-Expect the files `lora.pte` and `lora.ptd`.
+Expect the files:
+- llama_3_2_1B.pte
+- llama_3_2_1B.ptd
+- llama_3_2_1B_lora.pte
+- foundation_weights.ptd
+- tokenizer.model
+
+llama_3_2_1B.ptd and foundation_weights.ptd contain the same contents, and you can remove llama_3_2_1B.ptd.
+tokenizer.model is copied from the temp directory where we downloaded the HF artifacts. It will be used at runtime.
 
 Note:
 - PTE: contains the program execution logic.
 - PTD: contains the constant tensors used by the PTE.
 
-See [program-data-separation](../../program-data-separation/README.md) for instructions.
-
 ## Install runtime dependencies.
 The ExecuTorch repository is configured as a git submodule at `~/executorch-examples/program-data-separation/cpp/executorch`.  To initialize it:
 ```bash
@@ -53,21 +63,24 @@ pip install -r requirements-dev.txt
 ```
 
 ## Build the runtime.
+Install some dependencies:
+```bash
+cd ~/executorch-examples/program-data-separation/cpp/executorch
+sh examples/models/llama/install_requirements.sh
+```
+
 Build the executable:
 ```bash
 cd ~/executorch-examples/program-data-separation/cpp/lora_example
-chmod +x build_example.sh
-./build_example.sh
+sh build_example.sh
 ```
 
 ## Run the executable.
 ```
-./build/bin/executorch_program_data_separation --model-path ../../models/linear.pte --data-path ../../models/linear.ptd
-
-./build/bin/executorch_program_data_separation --model-path ../../models/linear_xnnpack.pte --data-path ../../models/linear_xnnpack.ptd
+./build/bin/executorch_program_data_separation --lora_model_path=../../llama_3_2_1B_lora.pte --llama_model_path=../../llama_3_2_1B.pte --tokenizer_path=../../tokenizer.model --data_path=../../foundation.ptd
 ```
 
 ## Clean up.
 rm -rf build
 cd ~/executorch-examples/program-data-separation
-rm -rf models
+rm -rf *.pte *.ptd tokenizer.model
diff --git a/program-data-separation/cpp/lora_example/build_example.sh b/program-data-separation/cpp/lora_example/build_example.sh
index e69de29b..6f63e825 100644
--- a/program-data-separation/cpp/lora_example/build_example.sh
+++ b/program-data-separation/cpp/lora_example/build_example.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+set -e
+
+# Clean and create build directory if it doesn't exist
+rm -rf build
+mkdir -p build
+cd build
+
+# Configure CMake
+cmake -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_BUILD_LORA_DEMO=True ../..
+
+# Build the project
+cmake --build . -j$(nproc)
+
+echo "Build complete! Executable located at: ./build/bin/executorch_program_data_separation"
diff --git a/program-data-separation/cpp/lora_example/main.cpp b/program-data-separation/cpp/lora_example/main.cpp
index e69de29b..25aca0d3 100644
--- a/program-data-separation/cpp/lora_example/main.cpp
+++ b/program-data-separation/cpp/lora_example/main.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ * @lint-ignore-every CLANGTIDY facebook-hte-Deprecated
+ */
+#include <gflags/gflags.h>
+
+#include <executorch/examples/models/llama/runner/runner.h>
+
+#if defined(ET_USE_THREADPOOL)
+#include <executorch/extension/threadpool/cpuinfo_utils.h>
+#include <executorch/extension/threadpool/threadpool.h>
+#endif
+
+DEFINE_string(lora_model_path, "llama_3_2_1B_lora.pte",
+              "LoRA model serialized in flatbuffer format.");
+DEFINE_string(llama_model_path, "llama_3_2_1B.pte",
+              "Model serialized in flatbuffer format.");
+DEFINE_string(data_path, "foundation.ptd",
+              "Data serialized in flatbuffer format.");
+
+DEFINE_string(tokenizer_path, "tokenizer.model", "Tokenizer stuff.");
+
+DEFINE_string(prompt, "The answer to the ultimate question is", "Prompt.");
+
+DEFINE_double(temperature, 0,
+              "Temperature; Default is 0. 0 = greedy argmax sampling "
+              "(deterministic). Lower temperature = more deterministic");
+
+DEFINE_int32(
+    seq_len, 128,
+    "Total number of tokens to generate (prompt + output). Defaults to "
+    "max_seq_len. If the number of input tokens + seq_len > max_seq_len, the "
+    "output will be truncated to max_seq_len tokens.");
+
+using namespace ::executorch::extension;
+
+int main(int argc, char *argv[]) {
+  ET_LOG(Info, "Running program-data separation lora example...");
+
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+  const char *lora_model_path = FLAGS_lora_model_path.c_str();
+  const char *llama_model_path = FLAGS_llama_model_path.c_str();
+  const char *data_path = FLAGS_data_path.c_str();
+
+  const char *tokenizer_path = FLAGS_tokenizer_path.c_str();
+  const char *prompt = FLAGS_prompt.c_str();
+  float temperature = FLAGS_temperature;
+  int32_t seq_len = 128;
+  int32_t cpu_threads = -1;
+
+  // Create runner for lora model.
+  std::unique_ptr<::executorch::extension::llm::TextLLMRunner> lora_runner =
+      example::create_llama_runner(lora_model_path, tokenizer_path, data_path);
+  if (lora_runner == nullptr) {
+    ET_LOG(Error, "Failed to create lora_runner.");
+    return 1;
+  }
+
+  // create runner for llama model
+  std::unique_ptr<::executorch::extension::llm::TextLLMRunner> llama_runner =
+      example::create_llama_runner(llama_model_path, tokenizer_path, data_path);
+  if (llama_runner == nullptr) {
+    ET_LOG(Error, "Failed to create llama_runner.");
+    return 1;
+  }
+
+  // generate
+  executorch::extension::llm::GenerationConfig config{
+      .seq_len = seq_len, .temperature = temperature};
+
+  auto error = lora_runner->generate(prompt, config);
+  if (error != executorch::runtime::Error::Ok) {
+    ET_LOG(Error, "Failed to generate with lora_runner, error code %zu.",
+           error);
+    return 1;
+  }
+
+  ET_LOG(Info, "Generating with llama...");
+  error = llama_runner->generate(prompt, config);
+  if (error != executorch::runtime::Error::Ok) {
+    ET_LOG(Error, "Failed to generate with llama_runner, error code %zu.",
+           error);
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/program-data-separation/export_lora.sh b/program-data-separation/export_lora.sh
index e69de29b..082de33b 100644
--- a/program-data-separation/export_lora.sh
+++ b/program-data-separation/export_lora.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+python -m pip install torchtune==0.7.0.dev20250730  --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+
+# Download model artifacts from HF.
+DOWNLOADED_PATH=$(python -c "
+from huggingface_hub import snapshot_download
+path=snapshot_download(
+    repo_id=\"lucylq/llama3_1B_lora\",
+)
+import os
+print(path)
+")
+
+# Copy over tokenizer, for use at runtime.
+cp "${DOWNLOADED_PATH}/tokenizer.model" .
+
+# Export a non-LoRA model with program-data separated.
+MODEL="llama_3_2_1B"
+python -m executorch.extension.llm.export.export_llm \
+    base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+    base.params="${DOWNLOADED_PATH}/params.json" \
+    base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
+    model.use_kv_cache=true \
+    model.use_sdpa_with_kv_cache=true \
+    model.dtype_override="fp32" \
+    backend.xnnpack.enabled=true \
+    backend.xnnpack.extended_ops=true \
+    export.output_name="${MODEL}.pte" \
+    export.foundation_weights_file="${MODEL}.ptd"
+
+# Export a LoRA model, with program and data separated.
+LORA_MODEL="llama_3_2_1B_lora"
+python -m executorch.extension.llm.export.export_llm \
+    base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+    base.params="${DOWNLOADED_PATH}/params.json" \
+    base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
+    base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
+    base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
+    model.use_kv_cache=true \
+    model.use_sdpa_with_kv_cache=true \
+    model.dtype_override="fp32" \
+    backend.xnnpack.enabled=true \
+    backend.xnnpack.extended_ops=true \
+    export.output_name="${LORA_MODEL}.pte" \
+    export.foundation_weights_file="foundation.ptd"

From f2f06c1ebc34aec8c3b32098a1b40ef265a87232 Mon Sep 17 00:00:00 2001
From: lucylq <lfq@meta.com>
Date: Wed, 20 Aug 2025 15:35:12 -0700
Subject: [PATCH 3/3] weight sharing

---
 program-data-separation/cpp/CMakeLists.txt    |  2 -
 .../cpp/lora_example/README.md                |  4 +-
 .../cpp/lora_example/build_example.sh         |  2 +-
 .../cpp/lora_example/main.cpp                 | 84 +++++++++++++------
 4 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/program-data-separation/cpp/CMakeLists.txt b/program-data-separation/cpp/CMakeLists.txt
index ac7d9112..44e83a9e 100644
--- a/program-data-separation/cpp/CMakeLists.txt
+++ b/program-data-separation/cpp/CMakeLists.txt
@@ -41,8 +41,6 @@ if(EXECUTORCH_BUILD_LINEAR_DEMO)
 endif()
 if(EXECUTORCH_BUILD_LORA_DEMO)
   list(APPEND DEMO_SOURCES "lora_example/main.cpp")
-  add_subdirectory("executorch/examples/models/llama/runner")
-  list(APPEND LINK_LIBS llama_runner)
 endif()
 
 # Create executable
diff --git a/program-data-separation/cpp/lora_example/README.md b/program-data-separation/cpp/lora_example/README.md
index 9f89f03e..44f158c0 100644
--- a/program-data-separation/cpp/lora_example/README.md
+++ b/program-data-separation/cpp/lora_example/README.md
@@ -76,7 +76,9 @@ sh build_example.sh
 ```
 
 ## Run the executable.
-```
+```bash
+cd ~/executorch-examples/program-data-separation/cpp/lora_example
+
 ./build/bin/executorch_program_data_separation --lora_model_path=../../llama_3_2_1B_lora.pte --llama_model_path=../../llama_3_2_1B.pte --tokenizer_path=../../tokenizer.model --data_path=../../foundation.ptd
 ```
 
diff --git a/program-data-separation/cpp/lora_example/build_example.sh b/program-data-separation/cpp/lora_example/build_example.sh
index 6f63e825..0b4d194a 100644
--- a/program-data-separation/cpp/lora_example/build_example.sh
+++ b/program-data-separation/cpp/lora_example/build_example.sh
@@ -7,7 +7,7 @@ mkdir -p build
 cd build
 
 # Configure CMake
-cmake -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_BUILD_LORA_DEMO=True ../..
+cmake -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_BUILD_LORA_DEMO=True -DEXECUTORCH_XNNPACK_ENABLE_WEIGHT_CACHE=True ../..
 
 # Build the project
 cmake --build . -j$(nproc)
diff --git a/program-data-separation/cpp/lora_example/main.cpp b/program-data-separation/cpp/lora_example/main.cpp
index 25aca0d3..ab33d958 100644
--- a/program-data-separation/cpp/lora_example/main.cpp
+++ b/program-data-separation/cpp/lora_example/main.cpp
@@ -6,9 +6,18 @@
  * LICENSE file in the root directory of this source tree.
  * @lint-ignore-every CLANGTIDY facebook-hte-Deprecated
  */
+
+#include <memory>
+#include <string>
+#include <vector>
+
 #include <gflags/gflags.h>
 
-#include <executorch/examples/models/llama/runner/runner.h>
+#include <executorch/extension/llm/runner/llm_runner_helper.h>
+#include <executorch/extension/llm/runner/stats.h>
+#include <executorch/extension/llm/runner/text_llm_runner.h>
+#include <executorch/extension/llm/runner/text_prefiller.h>
+#include <executorch/extension/llm/runner/text_token_generator.h>
 
 #if defined(ET_USE_THREADPOOL)
 #include <executorch/extension/threadpool/cpuinfo_utils.h>
@@ -36,7 +45,30 @@ DEFINE_int32(
     "max_seq_len. If the number of input tokens + seq_len > max_seq_len, the "
     "output will be truncated to max_seq_len tokens.");
 
-using namespace ::executorch::extension;
+using executorch::extension::Module;
+using executorch::runtime::Error;
+namespace llm = executorch::extension::llm;
+
+namespace {
+static constexpr int32_t kSpecialTokensSize = 256;
+static inline std::unique_ptr<std::vector<std::string>>
+_get_default_special_tokens() {
+  auto special_tokens =
+      std::make_unique<std::vector<std::string>>(std::vector<std::string>{
+          "<|begin_of_text|>", "<|end_of_text|>",
+          "<|reserved_special_token_0|>", "<|reserved_special_token_1|>",
+          "<|finetune_right_pad_id|>", "<|step_id|>", "<|start_header_id|>",
+          "<|end_header_id|>", "<|eom_id|>", "<|eot_id|>", "<|python_tag|>"});
+  // pad the rest of the special tokens with reserved tokens
+  ssize_t reserved_special_token_num = 2;
+  while (special_tokens->size() < kSpecialTokensSize) {
+    special_tokens->emplace_back("<|reserved_special_token_" +
+                                 std::to_string(reserved_special_token_num++) +
+                                 "|>");
+  }
+  return special_tokens;
+}
+} // namespace
 
 int main(int argc, char *argv[]) {
   ET_LOG(Info, "Running program-data separation lora example...");
@@ -53,37 +85,41 @@ int main(int argc, char *argv[]) {
   int32_t seq_len = 128;
   int32_t cpu_threads = -1;
 
-  // Create runner for lora model.
-  std::unique_ptr<::executorch::extension::llm::TextLLMRunner> lora_runner =
-      example::create_llama_runner(lora_model_path, tokenizer_path, data_path);
-  if (lora_runner == nullptr) {
-    ET_LOG(Error, "Failed to create lora_runner.");
+  // Create tokenizers.
+  std::unique_ptr<tokenizers::Tokenizer> tokenizer1 =
+      llm::load_tokenizer(tokenizer_path, _get_default_special_tokens());
+  std::unique_ptr<tokenizers::Tokenizer> tokenizer2 =
+      llm::load_tokenizer(tokenizer_path, _get_default_special_tokens());
+
+  if (tokenizer1 == nullptr || tokenizer2 == nullptr) {
+    ET_LOG(Info,
+           "Failed to load %s as a Tiktoken, Sentencepiece or Llama2.c "
+           "tokenizer, make sure the artifact is one of these types",
+           tokenizer_path);
     return 1;
   }
 
-  // create runner for llama model
-  std::unique_ptr<::executorch::extension::llm::TextLLMRunner> llama_runner =
-      example::create_llama_runner(llama_model_path, tokenizer_path, data_path);
-  if (llama_runner == nullptr) {
-    ET_LOG(Error, "Failed to create llama_runner.");
-    return 1;
-  }
+  // Create runners.
+  std::unique_ptr<llm::TextLLMRunner> llama_runner =
+      llm::create_text_llm_runner(llama_model_path, std::move(tokenizer1),
+                                  data_path, temperature);
+  std::unique_ptr<llm::TextLLMRunner> lora_runner = llm::create_text_llm_runner(
+      lora_model_path, std::move(tokenizer2), data_path, temperature);
 
-  // generate
-  executorch::extension::llm::GenerationConfig config{
-      .seq_len = seq_len, .temperature = temperature};
+  // Generate.
+  llm::GenerationConfig config{.seq_len = seq_len, .temperature = temperature};
 
-  auto error = lora_runner->generate(prompt, config);
-  if (error != executorch::runtime::Error::Ok) {
-    ET_LOG(Error, "Failed to generate with lora_runner, error code %zu.",
+  ET_LOG(Info, "Generating with llama...");
+  auto error = llama_runner->generate(prompt, config);
+  if (error != Error::Ok) {
+    ET_LOG(Error, "Failed to generate with llama_runner, error code %zu.",
            error);
     return 1;
   }
 
-  ET_LOG(Info, "Generating with llama...");
-  error = llama_runner->generate(prompt, config);
-  if (error != executorch::runtime::Error::Ok) {
-    ET_LOG(Error, "Failed to generate with llama_runner, error code %zu.",
+  error = lora_runner->generate(prompt, config);
+  if (error != Error::Ok) {
+    ET_LOG(Error, "Failed to generate with lora_runner, error code %zu.",
            error);
     return 1;
   }