Scott pr review

jackzhxng · jackzhxng · commit 173308ef0931 · 2025-04-09T14:59:05.000-07:00
diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp
@@ -16,8 +16,8 @@
 #include <executorch/extension/llm/runner/util.h>
 
 #include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
-#include <pytorch/tokenizers/llama2c_tokenizer.h>
 #include <pytorch/tokenizers/hf_tokenizer.h>
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
 
 namespace example {
 
@@ -36,6 +36,41 @@ static constexpr auto kMaxContextLen = "get_max_context_len";
 static constexpr auto kVocabSize = "get_vocab_size";
 static constexpr auto kUseKVCache = "use_kv_cache";
 static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";
+
+std::unique_ptr<::tokenizers::Tokenizer> load_tokenizer(
+    const std::string& tokenizer_path) {
+  std::unique_ptr<::tokenizers::Tokenizer> tokenizer = nullptr;
+  ::tokenizers::Error err;
+
+  // First try to load as a json tokenizer.
+  {
+    auto tokenizer = std::make_unique<tokenizers::HFTokenizer>();
+    if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+      ET_LOG(Info, "Loaded json tokenizer");
+      return tokenizer;
+    }
+  }
+
+  // Try to load as tiktoken tokenizer.
+  {
+    auto tokenizer = get_tiktoken_for_llama();
+    if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+      ET_LOG(Info, "Loaded TikToken tokenizer");
+      return tokenizer;
+    }
+  }
+
+  // Try to load as BPE tokenizer.
+  {
+    auto tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>();
+    if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+      ET_LOG(Info, "Loaded BPE tokenizer");
+      return tokenizer;
+    }
+  }
+
+  return nullptr;
+}
 } // namespace
 
 Runner::Runner(
@@ -78,35 +113,15 @@ Error Runner::load() {
     return Error::Ok;
   }
   ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward"));
+
   // Load tokenizer.
-  tokenizer_ = nullptr;
-  // Check if tokenizer_path_ ends with ".json".
-  if (tokenizer_path_.size() >= 5 &&
-      
-      tokenizer_path_.compare(tokenizer_path_.size() - 5, 5, ".json") == 0) {
-    tokenizer_ = std::make_unique<tokenizers::HFTokenizer>();
-    ET_LOG(Info, "Loading json tokenizer");
-    tokenizer_->load(tokenizer_path_);
+  tokenizer_ = load_tokenizer(tokenizer_path_);
+  if (tokenizer_ == nullptr) {
     ET_LOG(
-        Info, "Loaded tokenizer %s as HF tokenizer", tokenizer_path_.c_str());
-  } else {
-    ::tokenizers::Error err = tokenizer_->load(tokenizer_path_);
-    tokenizer_ = get_tiktoken_for_llama();
-    // Rely on tiktoken to throw error if the artifact is incompatible. Then we
-    // fallback to BPE tokenizer.
-    if (err != ::tokenizers::Error::Ok) {
-      ET_LOG(
-	  Info,
-	  "Failed to load %s as a Tiktoken artifact, trying BPE tokenizer",
-	  tokenizer_path_.c_str());
-      tokenizer_.reset();
-      tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>();
-      err = tokenizer_->load(tokenizer_path_);
-      ET_CHECK_TK_OK_OR_RETURN_ERROR(
-	  err,
-	  "Failed to load %s as a llama2.c tokenizer artifact",
-	  tokenizer_path_.c_str());
-    }
+        Error,
+        "Failed to load %s as a llama2.c tokenizer artifact",
+        tokenizer_path_.c_str());
+    return ::executorch::runtime::Error::InvalidArgument;
   }
 
   ET_LOG(Info, "Reading metadata from model");