pytorch · mergennachin · Jun 26, 2025
@@ -10,6 +10,7 @@
 
 #import <ExecuTorch/ExecuTorchLog.h>
 #import <executorch/examples/models/llama/runner/runner.h>
+#import <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
 #import <executorch/examples/models/llava/runner/llava_runner.h>
 
 using executorch::extension::llm::GenerationConfig;
@@ -32,8 +33,17 @@ - (instancetype)initWithModelPath:(NSString*)modelPath
   self = [super init];
   if (self) {
     [ExecuTorchLog.sharedLog addSink:self];
-    _runner = example::create_llama_runner(
-        modelPath.UTF8String, tokenizerPath.UTF8String);
+    // Create and load tokenizer
+    auto special_tokens = example::get_special_tokens(example::Version::Default);
+    std::unique_ptr<::tokenizers::Tokenizer> tokenizer =
+        executorch::extension::llm::load_tokenizer(tokenizerPath.UTF8String, std::move(special_tokens));
+
+    if (tokenizer == nullptr) {
+      _runner = nullptr;
+    } else {
+      _runner = executorch::extension::llm::create_text_llm_runner(
+          modelPath.UTF8String, std::move(tokenizer), std::nullopt);
+    }
   }
   return self;
 }

@@ -10,6 +10,7 @@
 #include <gflags/gflags.h>
 
 #include <executorch/examples/models/llama/runner/runner.h>
+#include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
 
 #if defined(ET_USE_THREADPOOL)
 #include <executorch/extension/threadpool/cpuinfo_utils.h>
@@ -91,8 +92,27 @@ int32_t main(int32_t argc, char** argv) {
   }
 #endif
   // create llama runner
-  std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner =
-      example::create_llama_runner(model_path, tokenizer_path, data_path);
+  ET_LOG(
+      Info,
+      "Creating LLaMa runner: model_path=%s, tokenizer_path=%s",
+      model_path,
+      tokenizer_path);
+
+  // Create and load tokenizer
+  auto special_tokens = example::get_special_tokens(example::Version::Default);
+  std::unique_ptr<::tokenizers::Tokenizer> tokenizer =
+      ::executorch::extension::llm::load_tokenizer(tokenizer_path, std::move(special_tokens));
+
+  std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner = nullptr;
+  if (tokenizer == nullptr) {
+    ET_LOG(
+        Info,
+        "Failed to load %s as a Tiktoken, Sentencepiece or Llama2.c tokenizer, make sure the artifact is one of these types",
+        tokenizer_path);
+  } else {
+    runner = ::executorch::extension::llm::create_text_llm_runner(
+        model_path, std::move(tokenizer), data_path);
+  }
 
   if (runner == nullptr) {
     ET_LOG(Error, "Failed to create llama runner");

@@ -26,12 +26,14 @@ namespace example {
 
 namespace llm = ::executorch::extension::llm;
 
+[[deprecated("Use load_llama_tokenizer and llm::create_text_llm_runner directly")]]
 std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& model_path,
     const std::string& tokenizer_path,
     std::optional<const std::string> data_path = std::nullopt,
     float temperature = -1.0f);
 
+[[deprecated("Use get_special_tokens and llm::load_tokenizer directly")]]
 std::unique_ptr<tokenizers::Tokenizer> load_llama_tokenizer(
     const std::string& tokenizer_path);
 

@@ -14,6 +14,7 @@
 #include <vector>
 
 #include <executorch/examples/models/llama/runner/runner.h>
+#include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
 #include <executorch/examples/models/llava/runner/llava_runner.h>
 #include <executorch/extension/llm/runner/image.h>
 #include <executorch/extension/llm/runner/irunner.h>
@@ -170,10 +171,17 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass<ExecuTorchLlmJni> {
           : std::nullopt;
       // TODO(larryliu0820): Use the API in text_llm_runner.h to create the
       // runner.
-      runner_ = example::create_llama_runner(
-          model_path->toStdString(),
-          tokenizer_path->toStdString(),
-          data_path_str);
+      // Create and load tokenizer
+      auto special_tokens = example::get_special_tokens(example::Version::Default);
+      std::unique_ptr<::tokenizers::Tokenizer> tokenizer =
+          llm::load_tokenizer(tokenizer_path->toStdString(), std::move(special_tokens));
+
+      if (tokenizer == nullptr) {
+        runner_ = nullptr;
+      } else {
+        runner_ = llm::create_text_llm_runner(
+            model_path->toStdString(), std::move(tokenizer), data_path_str);
+      }
 #if defined(EXECUTORCH_BUILD_MEDIATEK)
     } else if (model_type_category == MODEL_TYPE_MEDIATEK_LLAMA) {
       runner_ = std::make_unique<MTKLlamaRunner>(

@@ -9,6 +9,7 @@
 #import "ResourceTestCase.h"
 
 #import <executorch/examples/models/llama/runner/runner.h>
+#import <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
 
 using namespace ::executorch::extension;
 using namespace ::executorch::runtime;
@@ -74,8 +75,16 @@ @implementation LLaMATests
   NSString *tokenizerPath = resources[@"tokenizer"];
   return @{
     @"generate" : ^(XCTestCase *testCase){
-      auto __block runner = example::create_llama_runner(
-          modelPath.UTF8String, tokenizerPath.UTF8String);
+      // Create and load tokenizer
+      auto special_tokens = example::get_special_tokens(example::Version::Default);
+      std::unique_ptr<::tokenizers::Tokenizer> tokenizer =
+          ::executorch::extension::llm::load_tokenizer(tokenizerPath.UTF8String, std::move(special_tokens));
+
+      std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner = nullptr;
+      if (tokenizer != nullptr) {
+        runner = ::executorch::extension::llm::create_text_llm_runner(
+            modelPath.UTF8String, std::move(tokenizer), std::nullopt);
+      }
       if (!runner) {
         XCTFail("Failed to create runner");
         return;