Fix faulty merge

jackzhxng · jackzhxng · commit aa289eafba9b · 2024-11-13T13:44:07.000-08:00
diff --git a/examples/models/llama/runner/generation.py b/examples/models/llama/runner/generation.py
@@ -53,12 +53,25 @@ def __init__(
         max_batch_size: int,
         use_kv_cache: bool,
         vocab_size: int,
+        has_full_logits: bool = False,
         device: str = "cpu",
     ):
+        """
+        Constructor.
+        Args:
+        tokenizer_path: path to tokenizer.model file.
+        max_seq_len: max length of the output sequence, after which the output will be clipped.
+        max_batch_size: max batch size.
+        use_kv_cache: whether to use a KV cache.
+        vocab_size: number of items in the vocab.
+        has_full_logits: whether the model returns the full logits or only returns the last logit.
+        device: device to run the runner on.
+        """
         self.max_seq_len = max_seq_len
         self.max_batch_size = max_batch_size
         self.use_kv_cache = use_kv_cache
         self.tokenizer = get_tokenizer(tokenizer_path)
+        self.has_full_logits = has_full_logits
         self.device = device
         assert vocab_size == self.tokenizer.n_words