66 * LICENSE file in the root directory of this source tree.
77 */
88
9- #ifdef EXECUTORCH_FB_BUCK
10- #include < TestResourceUtils/TestResourceUtils.h>
11- #endif
129#include < executorch/extension/llm/tokenizer/tiktoken.h>
1310#include < executorch/runtime/platform/runtime.h>
1411#include < gmock/gmock.h>
1512#include < gtest/gtest.h>
16- #include < vector>
1713
1814using namespace ::testing;
1915using ::executorch::extension::llm::Tiktoken;
@@ -49,15 +45,6 @@ static inline std::unique_ptr<std::vector<std::string>> _get_special_tokens() {
4945 }
5046 return special_tokens;
5147}
52-
53- static inline std::string _get_resource_path (const std::string& name) {
54- #ifdef EXECUTORCH_FB_BUCK
55- return facebook::xplat::testing::getPathForTestResource (" resources/" + name);
56- #else
57- return std::getenv (" RESOURCES_PATH" ) + std::string (" /" ) + name;
58- #endif
59- }
60-
6148} // namespace
6249
6350class TiktokenExtensionTest : public Test {
@@ -66,7 +53,8 @@ class TiktokenExtensionTest : public Test {
6653 executorch::runtime::runtime_init ();
6754 tokenizer_ = std::make_unique<Tiktoken>(
6855 _get_special_tokens (), kBOSTokenIndex , kEOSTokenIndex );
69- modelPath_ = _get_resource_path (" test_tiktoken_tokenizer.model" );
56+ modelPath_ = std::getenv (" RESOURCES_PATH" ) +
57+ std::string (" /test_tiktoken_tokenizer.model" );
7058 }
7159
7260 std::unique_ptr<Tokenizer> tokenizer_;
@@ -84,15 +72,15 @@ TEST_F(TiktokenExtensionTest, DecodeWithoutLoadFails) {
8472}
8573
8674TEST_F (TiktokenExtensionTest, TokenizerVocabSizeIsExpected) {
87- Error res = tokenizer_->load (modelPath_. c_str () );
75+ Error res = tokenizer_->load (modelPath_);
8876 EXPECT_EQ (res, Error::Ok);
8977 EXPECT_EQ (tokenizer_->vocab_size (), 128256 );
9078 EXPECT_EQ (tokenizer_->bos_tok (), 128000 );
9179 EXPECT_EQ (tokenizer_->eos_tok (), 128001 );
9280}
9381
9482TEST_F (TiktokenExtensionTest, TokenizerEncodeCorrectly) {
95- Error res = tokenizer_->load (modelPath_. c_str () );
83+ Error res = tokenizer_->load (modelPath_);
9684 EXPECT_EQ (res, Error::Ok);
9785 Result<std::vector<uint64_t >> out = tokenizer_->encode (" hello world" , 1 , 0 );
9886 EXPECT_EQ (out.error (), Error::Ok);
@@ -103,7 +91,7 @@ TEST_F(TiktokenExtensionTest, TokenizerEncodeCorrectly) {
10391}
10492
10593TEST_F (TiktokenExtensionTest, TokenizerDecodeCorrectly) {
106- Error res = tokenizer_->load (modelPath_. c_str () );
94+ Error res = tokenizer_->load (modelPath_);
10795 EXPECT_EQ (res, Error::Ok);
10896 std::vector<std::string> expected = {" <|begin_of_text|>" , " hello" , " world" };
10997 std::vector<uint64_t > tokens = {128000 , 15339 , 1917 };
@@ -115,7 +103,7 @@ TEST_F(TiktokenExtensionTest, TokenizerDecodeCorrectly) {
115103}
116104
117105TEST_F (TiktokenExtensionTest, TokenizerDecodeOutOfRangeFails) {
118- Error res = tokenizer_->load (modelPath_. c_str () );
106+ Error res = tokenizer_->load (modelPath_);
119107 EXPECT_EQ (res, Error::Ok);
120108 // The vocab size is 128256, addes 256 just so the token is out of vocab
121109 // range.
@@ -160,31 +148,33 @@ TEST_F(TiktokenExtensionTest, LoadWithInvalidPath) {
160148}
161149
162150TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithInvalidRank) {
163- auto invalidModelPath =
164- _get_resource_path ( " test_tiktoken_invalid_rank.model" );
165- Error res = tokenizer_->load (invalidModelPath. c_str () );
151+ auto invalidModelPath = std::getenv ( " RESOURCES_PATH " ) +
152+ std::string ( " / test_tiktoken_invalid_rank.model" );
153+ Error res = tokenizer_->load (invalidModelPath);
166154
167155 EXPECT_EQ (res, Error::InvalidArgument);
168156}
169157
170158TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithInvalidBase64) {
171- auto invalidModelPath =
172- _get_resource_path ( " test_tiktoken_invalid_base64.model" );
173- Error res = tokenizer_->load (invalidModelPath. c_str () );
159+ auto invalidModelPath = std::getenv ( " RESOURCES_PATH " ) +
160+ std::string ( " / test_tiktoken_invalid_base64.model" );
161+ Error res = tokenizer_->load (invalidModelPath);
174162
175163 EXPECT_EQ (res, Error::InvalidArgument);
176164}
177165
178166TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithNoSpace) {
179- auto invalidModelPath = _get_resource_path (" test_tiktoken_no_space.model" );
180- Error res = tokenizer_->load (invalidModelPath.c_str ());
167+ auto invalidModelPath = std::getenv (" RESOURCES_PATH" ) +
168+ std::string (" /test_tiktoken_no_space.model" );
169+ Error res = tokenizer_->load (invalidModelPath);
181170
182171 EXPECT_EQ (res, Error::InvalidArgument);
183172}
184173
185174TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithBPEFile) {
186- auto invalidModelPath = _get_resource_path (" test_bpe_tokenizer.bin" );
187- Error res = tokenizer_->load (invalidModelPath.c_str ());
175+ auto invalidModelPath =
176+ std::getenv (" RESOURCES_PATH" ) + std::string (" /test_bpe_tokenizer.bin" );
177+ Error res = tokenizer_->load (invalidModelPath);
188178
189179 EXPECT_EQ (res, Error::InvalidArgument);
190180}
0 commit comments