@@ -73,17 +73,22 @@ static uint64_t MAX_RESPONSE = 50; // Maximum number of tokens to generate.
7373static constexpr int8_t kAddBos = 1 ;
7474static constexpr int8_t kAddEos = 0 ;
7575
76- using namespace torch ::executor;
77- using namespace torch ::executor::llm_helper;
78- using torch::executor::utils::Timer;
76+ using namespace example ::llm_helper;
77+ using example::utils::argmax;
78+ using example::utils::split;
79+ using example::utils::Timer;
80+ using example::utils::to_string;
81+ using namespace mtk ::vars;
82+
83+ namespace llm = ::executorch::extension::llm;
7984
8085MTKLlamaRunner::MTKLlamaRunner (
8186 const std::string& model_path,
8287 const std::string& tokenizer_path,
8388 const float temperature)
8489 : modeloptions_(get_model_options()),
8590 modelpaths_(get_model_paths()) {
86- runtime_init ();
91+ executorch::runtime:: runtime_init ();
8792 ET_LOG (
8893 Info,
8994 " Creating MTK Llama runner. Current it will self-load .pte, .bin, and .so files. Initiated runtime_init()." );
@@ -125,7 +130,7 @@ Error MTKLlamaRunner::generate(
125130 // Wrap the token_callback with print function
126131 std::function<void (const std::string&)> wrapped_callback =
127132 [token_callback](const std::string& piece) {
128- util ::safe_printf (piece.c_str ());
133+ llm ::safe_printf (piece.c_str ());
129134 fflush (stdout);
130135 if (token_callback) {
131136 token_callback (piece);
@@ -172,8 +177,8 @@ LlamaModelPaths MTKLlamaRunner::get_model_paths() {
172177 LlamaModelPaths model_paths = {
173178 .tokenizer_path = TOKENIZER_PATH,
174179 .token_embedding_path = TOKEN_EMBEDDING_PATH,
175- .prompt_model_paths = utils:: split (PROMPT_MODEL_PATHS, ' ,' ),
176- .gen_model_paths = utils:: split (GEN_MODEL_PATHS, ' ,' )};
180+ .prompt_model_paths = split (PROMPT_MODEL_PATHS, ' ,' ),
181+ .gen_model_paths = split (GEN_MODEL_PATHS, ' ,' )};
177182 ET_LOG (Info, " Completed get_model_paths" );
178183 return model_paths;
179184}
@@ -225,8 +230,7 @@ Result<uint64_t> MTKLlamaRunner::digest_prompt(
225230
226231 const auto vocab_size = tokenizer->vocab_size ();
227232 const auto logits_type = llama_runtime.GetModelOptions ().model_output_type ;
228- const auto first_output_token =
229- utils::argmax (logits_type, logits, vocab_size);
233+ const auto first_output_token = argmax (logits_type, logits, vocab_size);
230234 return first_output_token;
231235}
232236
@@ -273,7 +277,7 @@ Error MTKLlamaRunner::gen_response(
273277 timer_gen_token.End ();
274278
275279 prev_token = output_token;
276- output_token = utils:: argmax (logits_type, logits, vocab_size);
280+ output_token = argmax (logits_type, logits, vocab_size);
277281 full_response_tokens.push_back (output_token);
278282
279283 // Stop when output is EOS
@@ -293,7 +297,7 @@ Error MTKLlamaRunner::gen_response(
293297 }
294298
295299 std::cout << " \n\n [Generated Tokens]\n "
296- << utils:: to_string (full_response_tokens) << std::endl;
300+ << to_string (full_response_tokens) << std::endl;
297301
298302 ET_LOG (
299303 Info,
@@ -327,7 +331,7 @@ Error MTKLlamaRunner::inference(
327331std::unique_ptr<Tokenizer> MTKLlamaRunner::load_tokenizer () {
328332 std::unique_ptr<Tokenizer> tokenizer;
329333 // Assumes that tokenizer type is Tiktoken
330- tokenizer = torch::executor ::get_tiktoken_for_llama ();
334+ tokenizer = example ::get_tiktoken_for_llama ();
331335 tokenizer->load (modelpaths_.tokenizer_path );
332336 return tokenizer;
333337}
0 commit comments