Skip to content

Commit d2f3d2c

Browse files
authored
[llm] Add sentencepiece tokenizer support to llm runner
Add sentencepiece tokenizer support
1 parent 140cc14 commit d2f3d2c

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

extension/llm/runner/text_llm_runner.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <executorch/extension/llm/runner/util.h>
1515
#include <pytorch/tokenizers/hf_tokenizer.h>
1616
#include <pytorch/tokenizers/llama2c_tokenizer.h>
17+
#include <pytorch/tokenizers/sentencepiece_tokenizer.h>
1718
#include <pytorch/tokenizers/tiktoken.h>
1819

1920
namespace executorch::extension::llm {
@@ -252,6 +253,12 @@ std::unique_ptr<tokenizers::Tokenizer> load_tokenizer(
252253
return tiktoken_tokenizer;
253254
}
254255

256+
auto sp_tokenizer = std::make_unique<::tokenizers::SPTokenizer>();
257+
if (sp_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
258+
ET_LOG(Info, "Loaded Sentencepiece tokenizer");
259+
return sp_tokenizer;
260+
}
261+
255262
auto bpe_tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>();
256263
if (bpe_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
257264
ET_LOG(Info, "Loaded BPE tokenizer");

0 commit comments

Comments
 (0)