Do not print eos (#4654)

helunwencser · Lunwen He · web-flow · commit 3e0eb0ff0dfd · 2024-08-12T14:55:02.000-07:00
* allow models to use customized token ids during export (#4649) Summary: LLama3.1's [bos and eos](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/blob/main/tokenizer_config.json) are different from what is hardcoded in the code. This PR updates the export flow to allow read customized token ids instead of hardcoded ones. It also deletes a few metadata entries that are not used by the runner. Pull Request resolved: #4649 Differential Revision: D61044259 Pulled By: helunwencser * Do not print eos Summary: We don't want to print eos in the response because some eos tokens could be `<|end_of_text|>`. Differential Revision: D61048254 --------- Co-authored-by: Lunwen He <lunwenh@meta.com>
diff --git a/examples/models/llama2/runner/runner.cpp b/examples/models/llama2/runner/runner.cpp
@@ -228,19 +228,19 @@ Error Runner::generate(
       tokens_managed.resize({1, static_cast<int>(token_data.size())});
     }
 
-    // print the token as string, decode it with the Tokenizer object
-    wrapped_callback(ET_UNWRAP(tokenizer_->decode(prev_token, cur_token)));
-
-    if (shouldStop_) {
-      break;
-    }
-
     // data-dependent terminating condition: we have n_eos_ number of EOS
     if (pos >= num_prompt_tokens && cur_token == eos_id_) {
       printf("\n");
       ET_LOG(Info, "\nReached to the end of generation");
       break;
     }
+
+    // print the token as string, decode it with the Tokenizer object
+    wrapped_callback(ET_UNWRAP(tokenizer_->decode(prev_token, cur_token)));
+
+    if (shouldStop_) {
+      break;
+    }
   }
   stats_.inference_end_ms = util::time_in_ms();
   printf("\n");