From b0ee779d88cb41d691198e3fadc9c560fa1a49f0 Mon Sep 17 00:00:00 2001 From: lucylq Date: Thu, 2 Oct 2025 11:15:46 -0700 Subject: [PATCH 1/2] Runner support for multiple ptd files Pull Request resolved: https://github.com/pytorch/executorch/pull/14159 Add `std::vector` to allow for multiple data files in the runner. ghstack-source-id: 313663231 @exported-using-ghexport Differential Revision: [D82072385](https://our.internmc.facebook.com/intern/diff/D82072385/) --- examples/models/llama/runner/runner.cpp | 17 +++++++++++++++- examples/models/llama/runner/runner.h | 11 +++++++---- extension/llm/runner/llm_runner_helper.cpp | 22 +++++++++++++++++++-- extension/llm/runner/llm_runner_helper.h | 23 +++++++++++++++++++++- 4 files changed, 65 insertions(+), 8 deletions(-) diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp index 2ba2fdf9941..19ed9f88339 100644 --- a/examples/models/llama/runner/runner.cpp +++ b/examples/models/llama/runner/runner.cpp @@ -37,6 +37,21 @@ std::unique_ptr create_llama_runner( const std::string& tokenizer_path, std::optional data_path, float temperature) { + if (data_path.has_value()) { + std::vector data_files; + data_files.push_back(data_path.value()); + return create_llama_runner( + model_path, tokenizer_path, std::move(data_files), temperature); + } + return create_llama_runner( + model_path, tokenizer_path, std::vector(), temperature); +} + +std::unique_ptr create_llama_runner( + const std::string& model_path, + const std::string& tokenizer_path, + std::vector data_files, + float temperature) { ET_LOG( Info, "Creating LLaMa runner: model_path=%s, tokenizer_path=%s", @@ -55,7 +70,7 @@ std::unique_ptr create_llama_runner( return nullptr; } return llm::create_text_llm_runner( - model_path, std::move(tokenizer), data_path); + model_path, std::move(tokenizer), data_files); } } // namespace example diff --git a/examples/models/llama/runner/runner.h b/examples/models/llama/runner/runner.h index f07cd4e8ee8..728ae57efa8 100644 --- a/examples/models/llama/runner/runner.h +++ b/examples/models/llama/runner/runner.h @@ -11,12 +11,9 @@ #pragma once -#include -#include #include #include #include -#include #include #include @@ -30,7 +27,13 @@ namespace llm = ::executorch::extension::llm; std::unique_ptr create_llama_runner( const std::string& model_path, const std::string& tokenizer_path, - std::optional data_path = std::nullopt, + std::optional data_path, + float temperature = -1.0f); + +std::unique_ptr create_llama_runner( + const std::string& model_path, + const std::string& tokenizer_path, + std::vector data_files = {}, float temperature = -1.0f); std::unique_ptr load_llama_tokenizer( diff --git a/extension/llm/runner/llm_runner_helper.cpp b/extension/llm/runner/llm_runner_helper.cpp index f12de5f1d87..d1e4ff2ce45 100644 --- a/extension/llm/runner/llm_runner_helper.cpp +++ b/extension/llm/runner/llm_runner_helper.cpp @@ -183,6 +183,24 @@ std::unique_ptr create_text_llm_runner( std::unique_ptr<::tokenizers::Tokenizer> tokenizer, std::optional data_path, float temperature) { + if (data_path.has_value()) { + std::vector data_files; + data_files.push_back(data_path.value()); + return create_text_llm_runner( + model_path, std::move(tokenizer), std::move(data_files), temperature); + } + return create_text_llm_runner( + model_path, + std::move(tokenizer), + std::vector(), + temperature); +} + +std::unique_ptr create_text_llm_runner( + const std::string& model_path, + std::unique_ptr<::tokenizers::Tokenizer> tokenizer, + std::vector data_files, + float temperature) { // Sanity check tokenizer if (!tokenizer || !tokenizer->is_loaded()) { ET_LOG(Error, "Tokenizer is null or not loaded"); @@ -191,9 +209,9 @@ std::unique_ptr create_text_llm_runner( // Create the Module std::unique_ptr module; - if (data_path.has_value()) { + if (data_files.size() > 0) { module = std::make_unique( - model_path, data_path.value(), Module::LoadMode::File); + model_path, data_files, Module::LoadMode::File); } else { module = std::make_unique(model_path, Module::LoadMode::File); } diff --git a/extension/llm/runner/llm_runner_helper.h b/extension/llm/runner/llm_runner_helper.h index 191ea3ab090..5c109581e19 100644 --- a/extension/llm/runner/llm_runner_helper.h +++ b/extension/llm/runner/llm_runner_helper.h @@ -101,7 +101,28 @@ ET_EXPERIMENTAL std::unordered_set get_eos_ids( ET_EXPERIMENTAL std::unique_ptr create_text_llm_runner( const std::string& model_path, std::unique_ptr<::tokenizers::Tokenizer> tokenizer, - std::optional data_path = std::nullopt, + std::optional data_path, + float temperature = -1.0f); + +/** + * @brief Creates a TextLLMRunner instance with dependency injection + * + * This factory function creates and initializes a TextLLMRunner with all + * necessary components for text generation using the specified model and + * tokenizer. + * + * @param model_path Path to the model file + * @param tokenizer Initialized tokenizer instance + * @param data_files Vector of paths to additional data required by the model + * @param temperature Optional temperature parameter for controlling randomness + * (deprecated) + * @return std::unique_ptr Initialized TextLLMRunner instance, or + * nullptr on failure + */ +ET_EXPERIMENTAL std::unique_ptr create_text_llm_runner( + const std::string& model_path, + std::unique_ptr<::tokenizers::Tokenizer> tokenizer, + std::vector data_files = {}, float temperature = -1.0f); /** From 1dba02486dc4b2e75a9e6f042915cdd8ac6b9b6f Mon Sep 17 00:00:00 2001 From: lucylq Date: Thu, 2 Oct 2025 14:35:34 -0700 Subject: [PATCH 2/2] JNI support for multiple ptd files Pull Request resolved: https://github.com/pytorch/executorch/pull/14168 ^ ghstack-source-id: 313706359 Differential Revision: [D82072929](https://our.internmc.facebook.com/intern/diff/D82072929/) --- .../executorch/extension/llm/LlmModule.java | 33 +++++++++++++++---- extension/android/jni/jni_layer_llama.cpp | 29 +++++++++++----- 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java index 289df5defd9..f135731f26a 100644 --- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java +++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java @@ -11,6 +11,7 @@ import com.facebook.jni.HybridData; import com.facebook.jni.annotations.DoNotStrip; import java.io.File; +import java.util.List; import org.pytorch.executorch.ExecuTorchRuntime; import org.pytorch.executorch.annotations.Experimental; @@ -32,14 +33,22 @@ public class LlmModule { @DoNotStrip private static native HybridData initHybrid( - int modelType, String modulePath, String tokenizerPath, float temperature, String dataPath); + int modelType, + String modulePath, + String tokenizerPath, + float temperature, + List dataFiles); /** * Constructs a LLM Module for a model with given type, model path, tokenizer, temperature, and - * data path. + * dataFiles. */ public LlmModule( - int modelType, String modulePath, String tokenizerPath, float temperature, String dataPath) { + int modelType, + String modulePath, + String tokenizerPath, + float temperature, + List dataFiles) { ExecuTorchRuntime runtime = ExecuTorchRuntime.getRuntime(); File modelFile = new File(modulePath); @@ -50,12 +59,22 @@ public LlmModule( if (!tokenizerFile.canRead() || !tokenizerFile.isFile()) { throw new RuntimeException("Cannot load tokenizer path " + tokenizerPath); } - mHybridData = initHybrid(modelType, modulePath, tokenizerPath, temperature, dataPath); + + mHybridData = initHybrid(modelType, modulePath, tokenizerPath, temperature, dataFiles); + } + + /** + * Constructs a LLM Module for a model with given type, model path, tokenizer, temperature, and + * data path. + */ + public LlmModule( + int modelType, String modulePath, String tokenizerPath, float temperature, String dataPath) { + this(modelType, modulePath, tokenizerPath, temperature, List.of(dataPath)); } /** Constructs a LLM Module for a model with given model path, tokenizer, temperature. */ public LlmModule(String modulePath, String tokenizerPath, float temperature) { - this(MODEL_TYPE_TEXT, modulePath, tokenizerPath, temperature, null); + this(MODEL_TYPE_TEXT, modulePath, tokenizerPath, temperature, List.of()); } /** @@ -63,12 +82,12 @@ public LlmModule(String modulePath, String tokenizerPath, float temperature) { * path. */ public LlmModule(String modulePath, String tokenizerPath, float temperature, String dataPath) { - this(MODEL_TYPE_TEXT, modulePath, tokenizerPath, temperature, dataPath); + this(MODEL_TYPE_TEXT, modulePath, tokenizerPath, temperature, List.of(dataPath)); } /** Constructs a LLM Module for a model with given path, tokenizer, and temperature. */ public LlmModule(int modelType, String modulePath, String tokenizerPath, float temperature) { - this(modelType, modulePath, tokenizerPath, temperature, null); + this(modelType, modulePath, tokenizerPath, temperature, List.of()); } /** Constructs a LLM Module for a model with the given LlmModuleConfig */ diff --git a/extension/android/jni/jni_layer_llama.cpp b/extension/android/jni/jni_layer_llama.cpp index cabf30c42e4..a0c90991bf7 100644 --- a/extension/android/jni/jni_layer_llama.cpp +++ b/extension/android/jni/jni_layer_llama.cpp @@ -140,13 +140,13 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { facebook::jni::alias_ref model_path, facebook::jni::alias_ref tokenizer_path, jfloat temperature, - facebook::jni::alias_ref data_path) { + facebook::jni::alias_ref data_files) { return makeCxxInstance( model_type_category, model_path, tokenizer_path, temperature, - data_path); + data_files); } ExecuTorchLlmJni( @@ -154,7 +154,7 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { facebook::jni::alias_ref model_path, facebook::jni::alias_ref tokenizer_path, jfloat temperature, - facebook::jni::alias_ref data_path = nullptr) { + facebook::jni::alias_ref data_files = nullptr) { temperature_ = temperature; #if defined(ET_USE_THREADPOOL) // Reserve 1 thread for the main thread. @@ -173,18 +173,32 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { model_path->toStdString().c_str(), llm::load_tokenizer(tokenizer_path->toStdString())); } else if (model_type_category == MODEL_TYPE_CATEGORY_LLM) { - std::optional data_path_str = data_path - ? std::optional{data_path->toStdString()} - : std::nullopt; + std::vector data_files_vector; + if (data_files != nullptr) { + // Convert Java List to C++ std::vector + auto list_class = facebook::jni::findClassStatic("java/util/List"); + auto size_method = list_class->getMethod("size"); + auto get_method = + list_class->getMethod(jint)>( + "get"); + + jint size = size_method(data_files); + for (jint i = 0; i < size; ++i) { + auto str_obj = get_method(data_files, i); + auto jstr = facebook::jni::static_ref_cast(str_obj); + data_files_vector.push_back(jstr->toStdString()); + } + } runner_ = executorch::extension::llm::create_text_llm_runner( model_path->toStdString(), llm::load_tokenizer(tokenizer_path->toStdString()), - data_path_str); + data_files_vector); #if defined(EXECUTORCH_BUILD_QNN) } else if (model_type_category == MODEL_TYPE_QNN_LLAMA) { std::unique_ptr module = std::make_unique< executorch::extension::Module>( model_path->toStdString().c_str(), + data_files_set, executorch::extension::Module::LoadMode::MmapUseMlockIgnoreErrors); std::string decoder_model = "llama3"; // use llama3 for now runner_ = std::make_unique>( // QNN runner @@ -192,7 +206,6 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { decoder_model.c_str(), model_path->toStdString().c_str(), tokenizer_path->toStdString().c_str(), - data_path->toStdString().c_str(), ""); model_type_category_ = MODEL_TYPE_CATEGORY_LLM; #endif