Skip to content

Commit 9c4357a

Browse files
committed
Phi-3 runner using TextLLMRunner
1 parent fe5967a commit 9c4357a

File tree

4 files changed

+28
-16
lines changed

4 files changed

+28
-16
lines changed

examples/models/phi-3-mini/CMakeLists.txt

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED True)
2121
set(CMAKE_BUILD_TYPE Release)
2222

2323
# Set options for executorch build.
24+
option(EXECUTORCH_BUILD_EXECUTOR_RUNNER "" OFF)
25+
option(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER "" ON)
2426
option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
2527
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
2628
option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR "" ON)
@@ -40,16 +42,13 @@ endif()
4042

4143
add_executable(
4244
phi_3_mini_runner
43-
main.cpp runner.cpp
44-
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/sampler/sampler.cpp
45-
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizers/src/llama2c_tokenizer.cpp
46-
)
47-
target_include_directories(
48-
phi_3_mini_runner
49-
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../../third-party/gflags/src
50-
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizers/include
45+
main.cpp
5146
)
47+
# target_include_directories(
48+
# phi_3_mini_runner
49+
# PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../../third-party/gflags/src
50+
# ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizers/include
51+
# )
5252
target_link_libraries(
53-
phi_3_mini_runner PRIVATE executorch extension_module_static extension_tensor
54-
optimized_native_cpu_ops_lib xnnpack_backend gflags
53+
phi_3_mini_runner PUBLIC optimized_native_cpu_ops_lib xnnpack_backend gflags extension_llm_runner
5554
)

examples/models/phi-3-mini/export_phi-3-mini.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def export(args) -> None:
8282
)
8383

8484
edge_config = get_xnnpack_edge_compile_config()
85-
edge_manager = to_edge(model, compile_config=edge_config)
85+
edge_manager = to_edge(model, compile_config=edge_config, constant_methods={"get_eos_ids": [32000]})
8686
edge_manager = edge_manager.to_backend(XnnpackPartitioner())
8787
et_program = edge_manager.to_executorch()
8888

examples/models/phi-3-mini/main.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,12 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9+
#include <executorch/extension/llm/runner/text_llm_runner.h>
910
#include <gflags/gflags.h>
11+
#include <pytorch/tokenizers/llama2c_tokenizer.h>
12+
#include <iostream>
1013

11-
#include <executorch/examples/models/phi-3-mini/runner.h>
14+
using executorch::extension::llm::TextLLMRunner;
1215

1316
DEFINE_string(
1417
model_path,
@@ -42,9 +45,17 @@ int main(int32_t argc, char** argv) {
4245

4346
int32_t seq_len = FLAGS_seq_len;
4447

45-
example::Runner runner(model_path, tokenizer_path, temperature);
46-
47-
runner.generate(prompt, seq_len);
48+
std::unique_ptr<tokenizers::Tokenizer> tokenizer =
49+
std::make_unique<tokenizers::Llama2cTokenizer>();
50+
tokenizer->load(tokenizer_path);
51+
std::cout << "Tokenizer loaded, eos_id = " << tokenizer->eos_tok()
52+
<< std::endl;
53+
auto runner = executorch::extension::llm::create_text_llm_runner(
54+
model_path, std::move(tokenizer));
55+
56+
runner->generate(
57+
prompt,
58+
{.seq_len = seq_len, .temperature = static_cast<float>(temperature)});
4859

4960
return 0;
5061
}

examples/models/phi-3-mini/phi_3_mini.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,13 @@ def __init__(self, model: Phi3ForCausalLM, max_batch_size: int, max_seq_len: int
3030
def forward(
3131
self,
3232
# pyre-fixme[9]: input_ids has type `LongTensor`; used as `None`.
33-
input_ids: torch.LongTensor = None,
33+
input_ids: torch.LongTensor,
34+
cache_positions: torch.Tensor,
3435
) -> torch.FloatTensor:
3536
# pyre-fixme[16]: `Phi3ForCausalLM` has no attribute `forward`.
3637
return self.model.forward(
3738
input_ids=input_ids,
39+
cache_positions=cache_positions,
3840
use_cache=True,
3941
return_dict=True,
4042
past_key_values=self.cache,

0 commit comments

Comments
 (0)