pytorch · lucylq · Oct 3, 2025 · Oct 2, 2025 · Oct 2, 2025
@@ -37,6 +37,21 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& tokenizer_path,
     std::optional<const std::string> data_path,
     float temperature) {
+  if (data_path.has_value()) {
+    std::vector<std::string> data_files;
+    data_files.push_back(data_path.value());
+    return create_llama_runner(
+        model_path, tokenizer_path, std::move(data_files), temperature);
+  }
+  return create_llama_runner(
+      model_path, tokenizer_path, std::vector<std::string>(), temperature);
+}
+
+std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
+    const std::string& model_path,
+    const std::string& tokenizer_path,
+    std::vector<std::string> data_files,
+    float temperature) {
   ET_LOG(
       Info,
       "Creating LLaMa runner: model_path=%s, tokenizer_path=%s",
@@ -55,7 +70,7 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     return nullptr;
   }
   return llm::create_text_llm_runner(
-      model_path, std::move(tokenizer), data_path);
+      model_path, std::move(tokenizer), data_files);
 }
 
 } // namespace example
@@ -11,12 +11,9 @@
 
 #pragma once
 
-#include <cstdint>
-#include <functional>
 #include <memory>
 #include <optional>
 #include <string>
-#include <unordered_map>
 
 #include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
 #include <executorch/extension/llm/runner/irunner.h>
@@ -30,7 +27,13 @@ namespace llm = ::executorch::extension::llm;
 std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& model_path,
     const std::string& tokenizer_path,
-    std::optional<const std::string> data_path = std::nullopt,
+    std::optional<const std::string> data_path,
+    float temperature = -1.0f);
+
+std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
+    const std::string& model_path,
+    const std::string& tokenizer_path,
+    std::vector<std::string> data_files = {},
     float temperature = -1.0f);
 
 std::unique_ptr<tokenizers::Tokenizer> load_llama_tokenizer(

@@ -11,6 +11,7 @@
 import com.facebook.jni.HybridData;
 import com.facebook.jni.annotations.DoNotStrip;
 import java.io.File;
+import java.util.List;
 import org.pytorch.executorch.ExecuTorchRuntime;
 import org.pytorch.executorch.annotations.Experimental;
 
@@ -32,14 +33,22 @@ public class LlmModule {
 
   @DoNotStrip
   private static native HybridData initHybrid(
-      int modelType, String modulePath, String tokenizerPath, float temperature, String dataPath);
+      int modelType,
+      String modulePath,
+      String tokenizerPath,
+      float temperature,
+      List<String> dataFiles);
 
   /**
    * Constructs a LLM Module for a model with given type, model path, tokenizer, temperature, and
-   * data path.
+   * dataFiles.
    */
   public LlmModule(
-      int modelType, String modulePath, String tokenizerPath, float temperature, String dataPath) {
+      int modelType,
+      String modulePath,
+      String tokenizerPath,
+      float temperature,
+      List<String> dataFiles) {
     ExecuTorchRuntime runtime = ExecuTorchRuntime.getRuntime();
 
     File modelFile = new File(modulePath);
@@ -50,25 +59,35 @@ public LlmModule(
     if (!tokenizerFile.canRead() || !tokenizerFile.isFile()) {
       throw new RuntimeException("Cannot load tokenizer path " + tokenizerPath);
     }
-    mHybridData = initHybrid(modelType, modulePath, tokenizerPath, temperature, dataPath);
+
+    mHybridData = initHybrid(modelType, modulePath, tokenizerPath, temperature, dataFiles);
+  }
+
+  /**
+   * Constructs a LLM Module for a model with given type, model path, tokenizer, temperature, and
+   * data path.
+   */
+  public LlmModule(
+      int modelType, String modulePath, String tokenizerPath, float temperature, String dataPath) {
+    this(modelType, modulePath, tokenizerPath, temperature, List.of(dataPath));
   }
 
   /** Constructs a LLM Module for a model with given model path, tokenizer, temperature. */
   public LlmModule(String modulePath, String tokenizerPath, float temperature) {
-    this(MODEL_TYPE_TEXT, modulePath, tokenizerPath, temperature, null);
+    this(MODEL_TYPE_TEXT, modulePath, tokenizerPath, temperature, List.of());
   }
 
   /**
    * Constructs a LLM Module for a model with given model path, tokenizer, temperature and data
    * path.
    */
   public LlmModule(String modulePath, String tokenizerPath, float temperature, String dataPath) {
-    this(MODEL_TYPE_TEXT, modulePath, tokenizerPath, temperature, dataPath);
+    this(MODEL_TYPE_TEXT, modulePath, tokenizerPath, temperature, List.of(dataPath));
   }
 
   /** Constructs a LLM Module for a model with given path, tokenizer, and temperature. */
   public LlmModule(int modelType, String modulePath, String tokenizerPath, float temperature) {
-    this(modelType, modulePath, tokenizerPath, temperature, null);
+    this(modelType, modulePath, tokenizerPath, temperature, List.of());
   }
 
   /** Constructs a LLM Module for a model with the given LlmModuleConfig */

@@ -140,21 +140,21 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass<ExecuTorchLlmJni> {
       facebook::jni::alias_ref<jstring> model_path,
       facebook::jni::alias_ref<jstring> tokenizer_path,
       jfloat temperature,
-      facebook::jni::alias_ref<jstring> data_path) {
+      facebook::jni::alias_ref<jobject> data_files) {
     return makeCxxInstance(
         model_type_category,
         model_path,
         tokenizer_path,
         temperature,
-        data_path);
+        data_files);
   }
 
   ExecuTorchLlmJni(
       jint model_type_category,
       facebook::jni::alias_ref<jstring> model_path,
       facebook::jni::alias_ref<jstring> tokenizer_path,
       jfloat temperature,
-      facebook::jni::alias_ref<jstring> data_path = nullptr) {
+      facebook::jni::alias_ref<jobject> data_files = nullptr) {
     temperature_ = temperature;
 #if defined(ET_USE_THREADPOOL)
     // Reserve 1 thread for the main thread.
@@ -173,26 +173,39 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass<ExecuTorchLlmJni> {
           model_path->toStdString().c_str(),
           llm::load_tokenizer(tokenizer_path->toStdString()));
     } else if (model_type_category == MODEL_TYPE_CATEGORY_LLM) {
-      std::optional<const std::string> data_path_str = data_path
-          ? std::optional<const std::string>{data_path->toStdString()}
-          : std::nullopt;
+      std::vector<std::string> data_files_vector;
+      if (data_files != nullptr) {
+        // Convert Java List<String> to C++ std::vector<string>
+        auto list_class = facebook::jni::findClassStatic("java/util/List");
+        auto size_method = list_class->getMethod<jint()>("size");
+        auto get_method =
+            list_class->getMethod<facebook::jni::local_ref<jobject>(jint)>(
+                "get");
+
+        jint size = size_method(data_files);
+        for (jint i = 0; i < size; ++i) {
+          auto str_obj = get_method(data_files, i);
+          auto jstr = facebook::jni::static_ref_cast<jstring>(str_obj);
+          data_files_vector.push_back(jstr->toStdString());
+        }
+      }
       runner_ = executorch::extension::llm::create_text_llm_runner(
           model_path->toStdString(),
           llm::load_tokenizer(tokenizer_path->toStdString()),
-          data_path_str);
+          data_files_vector);
 #if defined(EXECUTORCH_BUILD_QNN)
     } else if (model_type_category == MODEL_TYPE_QNN_LLAMA) {
       std::unique_ptr<executorch::extension::Module> module = std::make_unique<
           executorch::extension::Module>(
           model_path->toStdString().c_str(),
+          data_files_set,
           executorch::extension::Module::LoadMode::MmapUseMlockIgnoreErrors);
       std::string decoder_model = "llama3"; // use llama3 for now
       runner_ = std::make_unique<example::Runner<uint16_t>>( // QNN runner
           std::move(module),
           decoder_model.c_str(),
           model_path->toStdString().c_str(),
           tokenizer_path->toStdString().c_str(),
-          data_path->toStdString().c_str(),
           "");
       model_type_category_ = MODEL_TYPE_CATEGORY_LLM;
 #endif

@@ -183,6 +183,24 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(
     std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
     std::optional<const std::string> data_path,
     float temperature) {
+  if (data_path.has_value()) {
+    std::vector<std::string> data_files;
+    data_files.push_back(data_path.value());
+    return create_text_llm_runner(
+        model_path, std::move(tokenizer), std::move(data_files), temperature);
+  }
+  return create_text_llm_runner(
+      model_path,
+      std::move(tokenizer),
+      std::vector<std::string>(),
+      temperature);
+}
+
+std::unique_ptr<TextLLMRunner> create_text_llm_runner(
+    const std::string& model_path,
+    std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
+    std::vector<std::string> data_files,
+    float temperature) {
   // Sanity check tokenizer
   if (!tokenizer || !tokenizer->is_loaded()) {
     ET_LOG(Error, "Tokenizer is null or not loaded");
@@ -191,9 +209,9 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(
 
   // Create the Module
   std::unique_ptr<Module> module;
-  if (data_path.has_value()) {
+  if (data_files.size() > 0) {
     module = std::make_unique<Module>(
-        model_path, data_path.value(), Module::LoadMode::File);
+        model_path, data_files, Module::LoadMode::File);
   } else {
     module = std::make_unique<Module>(model_path, Module::LoadMode::File);
   }

@@ -101,7 +101,28 @@ ET_EXPERIMENTAL std::unordered_set<uint64_t> get_eos_ids(
 ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
     const std::string& model_path,
     std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
-    std::optional<const std::string> data_path = std::nullopt,
+    std::optional<const std::string> data_path,
+    float temperature = -1.0f);
+
+/**
+ * @brief Creates a TextLLMRunner instance with dependency injection
+ *
+ * This factory function creates and initializes a TextLLMRunner with all
+ * necessary components for text generation using the specified model and
+ * tokenizer.
+ *
+ * @param model_path Path to the model file
+ * @param tokenizer Initialized tokenizer instance
+ * @param data_files Vector of paths to additional data required by the model
+ * @param temperature Optional temperature parameter for controlling randomness
+ * (deprecated)
+ * @return std::unique_ptr<TextLLMRunner> Initialized TextLLMRunner instance, or
+ * nullptr on failure
+ */
+ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
+    const std::string& model_path,
+    std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
+    std::vector<std::string> data_files = {},
     float temperature = -1.0f);
 
 /**