@@ -73,17 +73,22 @@ static uint64_t MAX_RESPONSE = 50; // Maximum number of tokens to generate.
7373static  constexpr  int8_t  kAddBos  = 1 ;
7474static  constexpr  int8_t  kAddEos  = 0 ;
7575
76- using  namespace  torch ::executor; 
77- using  namespace  torch ::executor::llm_helper; 
78- using  torch::executor::utils::Timer;
76+ using  namespace  example ::llm_helper; 
77+ using  example::utils::argmax;
78+ using  example::utils::split;
79+ using  example::utils::Timer;
80+ using  example::utils::to_string;
81+ using  namespace  mtk ::vars; 
82+ 
83+ namespace  llm  =  ::executorch::extension::llm;
7984
8085MTKLlamaRunner::MTKLlamaRunner (
8186  const  std::string& model_path,
8287  const  std::string& tokenizer_path,
8388  const  float  temperature)
8489  : modeloptions_(get_model_options()),
8590    modelpaths_(get_model_paths()) {
86-   runtime_init ();
91+   executorch::runtime:: runtime_init ();
8792  ET_LOG (
8893        Info,
8994        " Creating MTK Llama runner. Current it will self-load .pte, .bin, and .so files. Initiated runtime_init()."  );
@@ -125,7 +130,7 @@ Error MTKLlamaRunner::generate(
125130  //  Wrap the token_callback with print function
126131  std::function<void (const  std::string&)> wrapped_callback =
127132      [token_callback](const  std::string& piece) {
128-         util ::safe_printf (piece.c_str ());
133+         llm ::safe_printf (piece.c_str ());
129134        fflush (stdout);
130135        if  (token_callback) {
131136          token_callback (piece);
@@ -172,8 +177,8 @@ LlamaModelPaths MTKLlamaRunner::get_model_paths() {
172177  LlamaModelPaths model_paths = {
173178      .tokenizer_path  = TOKENIZER_PATH,
174179      .token_embedding_path  = TOKEN_EMBEDDING_PATH,
175-       .prompt_model_paths  = utils:: split (PROMPT_MODEL_PATHS, ' ,'  ),
176-       .gen_model_paths  = utils:: split (GEN_MODEL_PATHS, ' ,'  )};
180+       .prompt_model_paths  = split (PROMPT_MODEL_PATHS, ' ,'  ),
181+       .gen_model_paths  = split (GEN_MODEL_PATHS, ' ,'  )};
177182  ET_LOG (Info, " Completed get_model_paths"  );   
178183  return  model_paths;
179184}
@@ -225,8 +230,7 @@ Result<uint64_t> MTKLlamaRunner::digest_prompt(
225230
226231  const  auto  vocab_size = tokenizer->vocab_size ();
227232  const  auto  logits_type = llama_runtime.GetModelOptions ().model_output_type ;
228-   const  auto  first_output_token =
229-       utils::argmax (logits_type, logits, vocab_size);
233+   const  auto  first_output_token = argmax (logits_type, logits, vocab_size);
230234  return  first_output_token;
231235}
232236
@@ -273,7 +277,7 @@ Error MTKLlamaRunner::gen_response(
273277    timer_gen_token.End ();
274278
275279    prev_token = output_token;
276-     output_token = utils:: argmax (logits_type, logits, vocab_size);
280+     output_token = argmax (logits_type, logits, vocab_size);
277281    full_response_tokens.push_back (output_token);
278282
279283    //  Stop when output is EOS
@@ -293,7 +297,7 @@ Error MTKLlamaRunner::gen_response(
293297  }
294298
295299  std::cout << " \n\n [Generated Tokens]\n " 
296-             << utils:: to_string (full_response_tokens) << std::endl;
300+             << to_string (full_response_tokens) << std::endl;
297301
298302  ET_LOG (
299303      Info,
@@ -327,7 +331,7 @@ Error MTKLlamaRunner::inference(
327331std::unique_ptr<Tokenizer> MTKLlamaRunner::load_tokenizer () {
328332  std::unique_ptr<Tokenizer> tokenizer;
329333  //  Assumes that tokenizer type is Tiktoken
330-   tokenizer = torch::executor ::get_tiktoken_for_llama ();
334+   tokenizer = example ::get_tiktoken_for_llama ();
331335  tokenizer->load (modelpaths_.tokenizer_path );
332336  return  tokenizer;
333337}
0 commit comments