4444 * any receiver's applicable license agreements with MediaTek Inc.
4545 */
4646
47- #include " executorch/backends/mediatek/runtime/include/NeuronBufferAllocator.h"
4847#include < executorch/examples/mediatek/executor_runner/mtk_llama_runner.h>
48+ #include " executorch/backends/mediatek/runtime/include/NeuronBufferAllocator.h"
4949
5050#include < ctime>
5151#include < iostream>
6565
6666#include " llama_runner/ModelChunk.h"
6767#include " llama_runner/Utils.h"
68- #include " llama_runner/llm_helper/include/llm_types.h"
6968#include " llama_runner/llm_helper/include/llama_runner_values.h"
69+ #include " llama_runner/llm_helper/include/llm_types.h"
7070
7171static uint64_t MAX_RESPONSE = 50 ; // Maximum number of tokens to generate.
7272// Global BOS and EOS option for tokenization (encoding)
@@ -83,15 +83,14 @@ using namespace mtk::vars;
8383namespace llm = ::executorch::extension::llm;
8484
8585MTKLlamaRunner::MTKLlamaRunner (
86- const std::string& model_path,
87- const std::string& tokenizer_path,
88- const float temperature)
89- : modeloptions_(get_model_options()),
90- modelpaths_(get_model_paths()) {
86+ const std::string& model_path,
87+ const std::string& tokenizer_path,
88+ const float temperature)
89+ : modeloptions_(get_model_options()), modelpaths_(get_model_paths()) {
9190 executorch::runtime::runtime_init ();
9291 ET_LOG (
93- Info,
94- " Creating MTK Llama runner. Current it will self-load .pte, .bin, and .so files. Initiated runtime_init()." );
92+ Info,
93+ " Creating MTK Llama runner. Current it will self-load .pte, .bin, and .so files. Initiated runtime_init()." );
9594}
9695
9796Error MTKLlamaRunner::load () {
@@ -122,7 +121,6 @@ Error MTKLlamaRunner::generate(
122121 int32_t seq_len,
123122 std::function<void (const std::string&)> token_callback,
124123 std::function<void(const Stats&)> stats_callback) {
125-
126124 if (!is_loaded ()) {
127125 ET_CHECK_OK_OR_RETURN_ERROR (load ());
128126 }
@@ -137,9 +135,9 @@ Error MTKLlamaRunner::generate(
137135 }
138136 };
139137
140- ET_LOG (Info, " Starting inference from MTKLlamaRunner" );
138+ ET_LOG (Info, " Starting inference from MTKLlamaRunner" );
141139 inference (*runtime_.get (), tokenizer_, prompt, wrapped_callback);
142- ET_LOG (Info, " Completed inference from MTKLlamaRunner" );
140+ ET_LOG (Info, " Completed inference from MTKLlamaRunner" );
143141
144142 return Error::Ok;
145143}
@@ -169,7 +167,7 @@ LlamaModelOptions MTKLlamaRunner::get_model_options() {
169167 .cache_type = CACHE_TYPE,
170168 .mask_type = MASK_TYPE,
171169 .rot_emb_type = ROT_EMB_TYPE};
172- ET_LOG (Info, " Completed get_model_options" );
170+ ET_LOG (Info, " Completed get_model_options" );
173171 return options;
174172}
175173
@@ -179,7 +177,7 @@ LlamaModelPaths MTKLlamaRunner::get_model_paths() {
179177 .token_embedding_path = TOKEN_EMBEDDING_PATH,
180178 .prompt_model_paths = split (PROMPT_MODEL_PATHS, ' ,' ),
181179 .gen_model_paths = split (GEN_MODEL_PATHS, ' ,' )};
182- ET_LOG (Info, " Completed get_model_paths" );
180+ ET_LOG (Info, " Completed get_model_paths" );
183181 return model_paths;
184182}
185183
@@ -325,7 +323,8 @@ Error MTKLlamaRunner::inference(
325323 const auto first_output_token = prefill_res.get ();
326324
327325 // run generation mode (decoding)
328- return gen_response (llama_runtime, tokenizer, first_output_token, token_callback);
326+ return gen_response (
327+ llama_runtime, tokenizer, first_output_token, token_callback);
329328}
330329
331330std::unique_ptr<Tokenizer> MTKLlamaRunner::load_tokenizer () {
0 commit comments