Merge branch 'sparse_tree' of github.com:FasterDecoding/Medusa into sparse_tree

leeyeehoo · leeyeehoo · commit e9d21919cee7 · 2023-09-16T11:45:51.000-05:00
diff --git a/.gitignore b/.gitignore
@@ -165,4 +165,7 @@ wandb/
 
 ShareGPT_Vicuna_unfiltered/
 
-test_medusa*
+test_medusa*
+
+# test
+notebooks/test*.ipynb
diff --git a/medusa/model/medusa_choices.py b/medusa/model/medusa_choices.py
@@ -0,0 +1 @@
+mc_sim_7b_63 = [[0], [0, 0], [1], [0, 1], [2], [0, 0, 0], [1, 0], [0, 2], [3], [0, 3], [4], [0, 4], [2, 0], [0, 5], [0, 0, 1], [5], [0, 6], [6], [0, 7], [0, 1, 0], [1, 1], [7], [0, 8], [0, 0, 2], [3, 0], [0, 9], [8], [9], [1, 0, 0], [0, 2, 0], [1, 2], [0, 0, 3], [4, 0], [2, 1], [0, 0, 4], [0, 0, 5], [0, 0, 0, 0], [0, 1, 1], [0, 0, 6], [0, 3, 0], [5, 0], [1, 3], [0, 0, 7], [0, 0, 8], [0, 0, 9], [6, 0], [0, 4, 0], [1, 4], [7, 0], [0, 1, 2], [2, 0, 0], [3, 1], [2, 2], [8, 0], [0, 5, 0], [1, 5], [1, 0, 1], [0, 2, 1], [9, 0], [0, 6, 0], [0, 0, 0, 1], [1, 6], [0, 7, 0]]
diff --git a/medusa/model/medusa_model.py b/medusa/model/medusa_model.py
@@ -2,17 +2,18 @@
 import torch.nn as nn
 from transformers import PreTrainedModel, PretrainedConfig
 from .modeling_llama_kv import LlamaForCausalLM as KVLlamaForCausalLM
-from transformers import AutoTokenizer
 from .utils import *
 from .kv_cache import initialize_past_key_values
+from .medusa_choices import mc_sim_7b_63
+from transformers import AutoTokenizer
 import os
 from huggingface_hub import hf_hub_download
 
 
 class MedusaConfig(PretrainedConfig):
     def __init__(
         self,
-        medusa_num_heads=2,
+        medusa_num_heads=4,
         medusa_num_layers=1,
         base_model_name_or_path="lmsys/vicuna-7b-v1.3",
         **kwargs,
@@ -110,6 +111,7 @@ def get_tokenizer(self):
     def from_pretrained(
         cls,
         medusa_head_name_or_path,
+        medusa_num_heads=None,
         **kwargs,
     ):
         """
@@ -121,9 +123,12 @@ def from_pretrained(
             MedusaModel: A MedusaModel instance loaded from the given path.
         """
         medusa_config = MedusaConfig.from_pretrained(medusa_head_name_or_path)
+        if medusa_num_heads is not None:
+            medusa_config.medusa_num_heads = medusa_num_heads
         base_model = KVLlamaForCausalLM.from_pretrained(
             medusa_config.base_model_name_or_path, **kwargs
         )
+
         model = cls(
             base_model,
             medusa_config.medusa_num_heads,
@@ -191,7 +196,7 @@ def medusa_generate(
         max_steps=512,
         # The hyperparameters below are for the Medusa
         # top-1 prediciton for the next token, top-7 predictions for the next token, top-6 predictions for the next next token.
-        medusa_choices=[1, 7, 6],
+        medusa_choices=mc_sim_7b_63,
         posterior_threshold=0.09,  # threshold validation of Medusa output
         # another threshold hyperparameter, recommended to be sqrt(posterior_threshold)
         posterior_alpha=0.3,
@@ -225,7 +230,6 @@ def medusa_generate(
         self.medusa_buffers = medusa_buffers
         self.medusa_choices = medusa_choices
 
-        medusa_topk = medusa_choices[1:]
 
         # Initialize the past key and value states
         if hasattr(self, "past_key_values"):
@@ -260,9 +264,8 @@ def medusa_generate(
             candidates, tree_candidates = generate_candidates(
                 medusa_logits,
                 logits,
-                medusa_topk,
                 medusa_buffers["tree_indices"],
-                temperature,
+                medusa_buffers["retrieve_indices"],
             )
 
             # Use tree attention to verify the candidates and get predictions
diff --git a/medusa/model/utils.py b/medusa/model/utils.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+mc_sim_7b_63 = [[0], [0, 0], [1], [0, 1], [2], [0, 0, 0], [1, 0], [0, 2], [3], [0, 3], [4], [0, 4], [2, 0], [0, 5], [0, 0, 1], [5], [0, 6], [6], [0, 7], [0, 1, 0], [1, 1], [7], [0, 8], [0, 0, 2], [3, 0], [0, 9], [8], [9], [1, 0, 0], [0, 2, 0], [1, 2], [0, 0, 3], [4, 0], [2, 1], [0, 0, 4], [0, 0, 5], [0, 0, 0, 0], [0, 1, 1], [0, 0, 6], [0, 3, 0], [5, 0], [1, 3], [0, 0, 7], [0, 0, 8], [0, 0, 9], [6, 0], [0, 4, 0], [1, 4], [7, 0], [0, 1, 2], [2, 0, 0], [3, 1], [2, 2], [8, 0], [0, 5, 0], [1, 5], [1, 0, 1], [0, 2, 1], [9, 0], [0, 6, 0], [0, 0, 0, 1], [1, 6], [0, 7, 0]]