Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#import <ExecuTorch/ExecuTorchLog.h>
#import <executorch/examples/models/llama/runner/runner.h>
#import <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
#import <executorch/examples/models/llava/runner/llava_runner.h>

using executorch::extension::llm::GenerationConfig;
Expand All @@ -32,8 +33,17 @@ - (instancetype)initWithModelPath:(NSString*)modelPath
self = [super init];
if (self) {
[ExecuTorchLog.sharedLog addSink:self];
_runner = example::create_llama_runner(
modelPath.UTF8String, tokenizerPath.UTF8String);
// Create and load tokenizer
auto special_tokens = example::get_special_tokens(example::Version::Default);
std::unique_ptr<::tokenizers::Tokenizer> tokenizer =
executorch::extension::llm::load_tokenizer(tokenizerPath.UTF8String, std::move(special_tokens));

if (tokenizer == nullptr) {
_runner = nullptr;
} else {
_runner = executorch::extension::llm::create_text_llm_runner(
modelPath.UTF8String, std::move(tokenizer), std::nullopt);
}
}
return self;
}
Expand Down
24 changes: 22 additions & 2 deletions examples/models/llama/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <gflags/gflags.h>

#include <executorch/examples/models/llama/runner/runner.h>
#include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>

#if defined(ET_USE_THREADPOOL)
#include <executorch/extension/threadpool/cpuinfo_utils.h>
Expand Down Expand Up @@ -91,8 +92,27 @@ int32_t main(int32_t argc, char** argv) {
}
#endif
// create llama runner
std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner =
example::create_llama_runner(model_path, tokenizer_path, data_path);
ET_LOG(
Info,
"Creating LLaMa runner: model_path=%s, tokenizer_path=%s",
model_path,
tokenizer_path);

// Create and load tokenizer
auto special_tokens = example::get_special_tokens(example::Version::Default);
std::unique_ptr<::tokenizers::Tokenizer> tokenizer =
::executorch::extension::llm::load_tokenizer(tokenizer_path, std::move(special_tokens));

std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner = nullptr;
if (tokenizer == nullptr) {
ET_LOG(
Info,
"Failed to load %s as a Tiktoken, Sentencepiece or Llama2.c tokenizer, make sure the artifact is one of these types",
tokenizer_path);
} else {
runner = ::executorch::extension::llm::create_text_llm_runner(
model_path, std::move(tokenizer), data_path);
}

if (runner == nullptr) {
ET_LOG(Error, "Failed to create llama runner");
Expand Down
2 changes: 2 additions & 0 deletions examples/models/llama/runner/runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@ namespace example {

namespace llm = ::executorch::extension::llm;

[[deprecated("Use load_llama_tokenizer and llm::create_text_llm_runner directly")]]
std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
const std::string& model_path,
const std::string& tokenizer_path,
std::optional<const std::string> data_path = std::nullopt,
float temperature = -1.0f);

[[deprecated("Use get_special_tokens and llm::load_tokenizer directly")]]
std::unique_ptr<tokenizers::Tokenizer> load_llama_tokenizer(
const std::string& tokenizer_path);

Expand Down
16 changes: 12 additions & 4 deletions extension/android/jni/jni_layer_llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <vector>

#include <executorch/examples/models/llama/runner/runner.h>
#include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
#include <executorch/examples/models/llava/runner/llava_runner.h>
#include <executorch/extension/llm/runner/image.h>
#include <executorch/extension/llm/runner/irunner.h>
Expand Down Expand Up @@ -170,10 +171,17 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass<ExecuTorchLlmJni> {
: std::nullopt;
// TODO(larryliu0820): Use the API in text_llm_runner.h to create the
// runner.
runner_ = example::create_llama_runner(
model_path->toStdString(),
tokenizer_path->toStdString(),
data_path_str);
// Create and load tokenizer
auto special_tokens = example::get_special_tokens(example::Version::Default);
std::unique_ptr<::tokenizers::Tokenizer> tokenizer =
llm::load_tokenizer(tokenizer_path->toStdString(), std::move(special_tokens));

if (tokenizer == nullptr) {
runner_ = nullptr;
} else {
runner_ = llm::create_text_llm_runner(
model_path->toStdString(), std::move(tokenizer), data_path_str);
}
#if defined(EXECUTORCH_BUILD_MEDIATEK)
} else if (model_type_category == MODEL_TYPE_MEDIATEK_LLAMA) {
runner_ = std::make_unique<MTKLlamaRunner>(
Expand Down
13 changes: 11 additions & 2 deletions extension/benchmark/apple/Benchmark/Tests/LLaMA/LLaMATests.mm
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#import "ResourceTestCase.h"

#import <executorch/examples/models/llama/runner/runner.h>
#import <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>

using namespace ::executorch::extension;
using namespace ::executorch::runtime;
Expand Down Expand Up @@ -74,8 +75,16 @@ @implementation LLaMATests
NSString *tokenizerPath = resources[@"tokenizer"];
return @{
@"generate" : ^(XCTestCase *testCase){
auto __block runner = example::create_llama_runner(
modelPath.UTF8String, tokenizerPath.UTF8String);
// Create and load tokenizer
auto special_tokens = example::get_special_tokens(example::Version::Default);
std::unique_ptr<::tokenizers::Tokenizer> tokenizer =
::executorch::extension::llm::load_tokenizer(tokenizerPath.UTF8String, std::move(special_tokens));

std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner = nullptr;
if (tokenizer != nullptr) {
runner = ::executorch::extension::llm::create_text_llm_runner(
modelPath.UTF8String, std::move(tokenizer), std::nullopt);
}
if (!runner) {
XCTFail("Failed to create runner");
return;
Expand Down
Loading