Phi-3 runner using TextLLMRunner

larryliu0820 · larryliu0820 · commit 9c4357af802d · 2025-07-11T10:38:48.000-07:00
diff --git a/examples/models/phi-3-mini/CMakeLists.txt b/examples/models/phi-3-mini/CMakeLists.txt
@@ -21,6 +21,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED True)
 set(CMAKE_BUILD_TYPE Release)
 
 # Set options for executorch build.
+option(EXECUTORCH_BUILD_EXECUTOR_RUNNER "" OFF)
+option(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER "" ON)
 option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
 option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
 option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR "" ON)
@@ -40,16 +42,13 @@ endif()
 
 add_executable(
   phi_3_mini_runner
-  main.cpp runner.cpp
-  ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/sampler/sampler.cpp
-  ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizers/src/llama2c_tokenizer.cpp
-)
-target_include_directories(
-  phi_3_mini_runner
-  PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../../third-party/gflags/src
-         ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizers/include
+  main.cpp
 )
+# target_include_directories(
+#   phi_3_mini_runner
+#   PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../../third-party/gflags/src
+#          ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizers/include
+# )
 target_link_libraries(
-  phi_3_mini_runner PRIVATE executorch extension_module_static extension_tensor
-                            optimized_native_cpu_ops_lib xnnpack_backend gflags
+  phi_3_mini_runner PUBLIC optimized_native_cpu_ops_lib xnnpack_backend gflags extension_llm_runner
 )
diff --git a/examples/models/phi-3-mini/export_phi-3-mini.py b/examples/models/phi-3-mini/export_phi-3-mini.py
@@ -82,7 +82,7 @@ def export(args) -> None:
         )
 
     edge_config = get_xnnpack_edge_compile_config()
-    edge_manager = to_edge(model, compile_config=edge_config)
+    edge_manager = to_edge(model, compile_config=edge_config, constant_methods={"get_eos_ids": [32000]})
     edge_manager = edge_manager.to_backend(XnnpackPartitioner())
     et_program = edge_manager.to_executorch()
 
diff --git a/examples/models/phi-3-mini/main.cpp b/examples/models/phi-3-mini/main.cpp
@@ -6,9 +6,12 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <executorch/extension/llm/runner/text_llm_runner.h>
 #include <gflags/gflags.h>
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
+#include <iostream>
 
-#include <executorch/examples/models/phi-3-mini/runner.h>
+using executorch::extension::llm::TextLLMRunner;
 
 DEFINE_string(
     model_path,
@@ -42,9 +45,17 @@ int main(int32_t argc, char** argv) {
 
   int32_t seq_len = FLAGS_seq_len;
 
-  example::Runner runner(model_path, tokenizer_path, temperature);
-
-  runner.generate(prompt, seq_len);
+  std::unique_ptr<tokenizers::Tokenizer> tokenizer =
+      std::make_unique<tokenizers::Llama2cTokenizer>();
+  tokenizer->load(tokenizer_path);
+  std::cout << "Tokenizer loaded, eos_id = " << tokenizer->eos_tok()
+            << std::endl;
+  auto runner = executorch::extension::llm::create_text_llm_runner(
+      model_path, std::move(tokenizer));
+
+  runner->generate(
+      prompt,
+      {.seq_len = seq_len, .temperature = static_cast<float>(temperature)});
 
   return 0;
 }
diff --git a/examples/models/phi-3-mini/phi_3_mini.py b/examples/models/phi-3-mini/phi_3_mini.py
@@ -30,11 +30,13 @@ def __init__(self, model: Phi3ForCausalLM, max_batch_size: int, max_seq_len: int
     def forward(
         self,
         # pyre-fixme[9]: input_ids has type `LongTensor`; used as `None`.
-        input_ids: torch.LongTensor = None,
+        input_ids: torch.LongTensor,
+        cache_positions: torch.Tensor,
     ) -> torch.FloatTensor:
         # pyre-fixme[16]: `Phi3ForCausalLM` has no attribute `forward`.
         return self.model.forward(
             input_ids=input_ids,
+            cache_positions=cache_positions,
             use_cache=True,
             return_dict=True,
             past_key_values=self.cache,

Original file line number	Diff line number	Diff line change
`@@ -82,7 +82,7 @@ def export(args) -> None:`
`82`	`82`	`)`
`83`	`83`
`84`	`84`	`edge_config = get_xnnpack_edge_compile_config()`
`85`		`- edge_manager = to_edge(model, compile_config=edge_config)`
	`85`	`+ edge_manager = to_edge(model, compile_config=edge_config, constant_methods={"get_eos_ids": [32000]})`
`86`	`86`	`edge_manager = edge_manager.to_backend(XnnpackPartitioner())`
`87`	`87`	`et_program = edge_manager.to_executorch()`
`88`	`88`