diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm index fc7f440d999..c9ac2415653 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm @@ -10,6 +10,7 @@ #import #import +#import #import using executorch::extension::llm::GenerationConfig; @@ -32,8 +33,17 @@ - (instancetype)initWithModelPath:(NSString*)modelPath self = [super init]; if (self) { [ExecuTorchLog.sharedLog addSink:self]; - _runner = example::create_llama_runner( - modelPath.UTF8String, tokenizerPath.UTF8String); + // Create and load tokenizer + auto special_tokens = example::get_special_tokens(example::Version::Default); + std::unique_ptr<::tokenizers::Tokenizer> tokenizer = + executorch::extension::llm::load_tokenizer(tokenizerPath.UTF8String, std::move(special_tokens)); + + if (tokenizer == nullptr) { + _runner = nullptr; + } else { + _runner = executorch::extension::llm::create_text_llm_runner( + modelPath.UTF8String, std::move(tokenizer), std::nullopt); + } } return self; } diff --git a/examples/models/llama/main.cpp b/examples/models/llama/main.cpp index 5d34bf932e7..39f434606d0 100644 --- a/examples/models/llama/main.cpp +++ b/examples/models/llama/main.cpp @@ -10,6 +10,7 @@ #include #include +#include #if defined(ET_USE_THREADPOOL) #include @@ -91,8 +92,27 @@ int32_t main(int32_t argc, char** argv) { } #endif // create llama runner - std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner = - example::create_llama_runner(model_path, tokenizer_path, data_path); + ET_LOG( + Info, + "Creating LLaMa runner: model_path=%s, tokenizer_path=%s", + model_path, + tokenizer_path); + + // Create and load tokenizer + auto special_tokens = example::get_special_tokens(example::Version::Default); + std::unique_ptr<::tokenizers::Tokenizer> tokenizer = + ::executorch::extension::llm::load_tokenizer(tokenizer_path, std::move(special_tokens)); + + std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner = nullptr; + if (tokenizer == nullptr) { + ET_LOG( + Info, + "Failed to load %s as a Tiktoken, Sentencepiece or Llama2.c tokenizer, make sure the artifact is one of these types", + tokenizer_path); + } else { + runner = ::executorch::extension::llm::create_text_llm_runner( + model_path, std::move(tokenizer), data_path); + } if (runner == nullptr) { ET_LOG(Error, "Failed to create llama runner"); diff --git a/examples/models/llama/runner/runner.h b/examples/models/llama/runner/runner.h index 09a166b0109..78d867da25a 100644 --- a/examples/models/llama/runner/runner.h +++ b/examples/models/llama/runner/runner.h @@ -26,12 +26,14 @@ namespace example { namespace llm = ::executorch::extension::llm; +[[deprecated("Use load_llama_tokenizer and llm::create_text_llm_runner directly")]] std::unique_ptr create_llama_runner( const std::string& model_path, const std::string& tokenizer_path, std::optional data_path = std::nullopt, float temperature = -1.0f); +[[deprecated("Use get_special_tokens and llm::load_tokenizer directly")]] std::unique_ptr load_llama_tokenizer( const std::string& tokenizer_path); diff --git a/extension/android/jni/jni_layer_llama.cpp b/extension/android/jni/jni_layer_llama.cpp index ad1c77a92b9..8f35e2dcc26 100644 --- a/extension/android/jni/jni_layer_llama.cpp +++ b/extension/android/jni/jni_layer_llama.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -170,10 +171,17 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { : std::nullopt; // TODO(larryliu0820): Use the API in text_llm_runner.h to create the // runner. - runner_ = example::create_llama_runner( - model_path->toStdString(), - tokenizer_path->toStdString(), - data_path_str); + // Create and load tokenizer + auto special_tokens = example::get_special_tokens(example::Version::Default); + std::unique_ptr<::tokenizers::Tokenizer> tokenizer = + llm::load_tokenizer(tokenizer_path->toStdString(), std::move(special_tokens)); + + if (tokenizer == nullptr) { + runner_ = nullptr; + } else { + runner_ = llm::create_text_llm_runner( + model_path->toStdString(), std::move(tokenizer), data_path_str); + } #if defined(EXECUTORCH_BUILD_MEDIATEK) } else if (model_type_category == MODEL_TYPE_MEDIATEK_LLAMA) { runner_ = std::make_unique( diff --git a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm index 66f2e025749..387ee7a7d78 100644 --- a/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm +++ b/extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm @@ -9,6 +9,7 @@ #import "ResourceTestCase.h" #import +#import using namespace ::executorch::extension; using namespace ::executorch::runtime; @@ -74,8 +75,16 @@ @implementation LLaMATests NSString *tokenizerPath = resources[@"tokenizer"]; return @{ @"generate" : ^(XCTestCase *testCase){ - auto __block runner = example::create_llama_runner( - modelPath.UTF8String, tokenizerPath.UTF8String); + // Create and load tokenizer + auto special_tokens = example::get_special_tokens(example::Version::Default); + std::unique_ptr<::tokenizers::Tokenizer> tokenizer = + ::executorch::extension::llm::load_tokenizer(tokenizerPath.UTF8String, std::move(special_tokens)); + + std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner = nullptr; + if (tokenizer != nullptr) { + runner = ::executorch::extension::llm::create_text_llm_runner( + modelPath.UTF8String, std::move(tokenizer), std::nullopt); + } if (!runner) { XCTFail("Failed to create runner"); return;