fix bug

harveyp123 · harveyp123 · commit 0061b134ca95 · 2023-09-30T13:03:20.000-05:00
diff --git a/.gitignore b/.gitignore
@@ -169,4 +169,6 @@ test_medusa*
 
 # test
 notebooks/test*.ipynb
-notebooks/*.pdf
+notebooks/*.pdf
+*.sh
+llm_judge/data/mt_bench_test
diff --git a/llm_judge/README.md b/llm_judge/README.md
@@ -13,7 +13,7 @@ We report the 3 times running results of the Medusa X Vicuna v1.3 7/13/33b on a
 
 
 ```
-export CUDA_VISIBLE_DEVICES= 0 # set the GPU id
+export CUDA_VISIBLE_DEVICES=0 # set the GPU id
 python gen_model_answer_medusa.py  --model-path FasterDecoding/medusa-vicuna-7b-v1.3 --model-id medusa-vicuna-7b-v1.3-0
 python gen_model_answer_medusa.py  --model-path FasterDecoding/medusa-vicuna-13b-v1.3 --model-id medusa-vicuna-13b-v1.3-0
 python gen_model_answer_medusa.py  --model-path FasterDecoding/medusa-vicuna-33b-v1.3 --model-id medusa-vicuna-33b-v1.3-0
diff --git a/llm_judge/gen_model_answer_medusa.py b/llm_judge/gen_model_answer_medusa.py
@@ -22,7 +22,7 @@
 from medusa.model.utils import *
 from medusa.model.medusa_model import MedusaModel
 from medusa.model.kv_cache import initialize_past_key_values
-from medusa.model.medusa_choices import medusa_choices
+from medusa.model.medusa_choices import *
 
 def medusa_forward(input_ids, model, tokenizer, medusa_choices, temperature, posterior_threshold, posterior_alpha, max_steps = 512):
     assert input_ids.shape[0] == 1, "Only support batch size 1 for now!!"
@@ -191,7 +191,7 @@ def get_model_answers(
     tokenizer = model.get_tokenizer()
     
     model.eval()
-    print('Check model state:',model.training)
+    print('Check model training state:',model.training)
     
     cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES')
     print('CUDA VISIBLE DEVICES:', cuda_visible_devices)
@@ -456,14 +456,20 @@ def reorg_answer_file(answer_file):
         help="The posterior alpha for medusa sampling.",
     )
 
+    parser.add_argument(
+        "--medusa-choices",
+        type=str,
+        default="mc_sim_7b_63",
+        help="The medusa choices for medusa sampling.",
+    )
 
 
 
 
     args = parser.parse_args()
 
     args.model_id = args.model_id+"-temperature-"+str(args.temperature)+"-posterior_threshold-"+str(args.posterior_threshold)+"-posterior_alpha-"+str(args.posterior_alpha)
-    
+    args.medusa_choices = eval(args.medusa_choices)
     if args.num_gpus_total // args.num_gpus_per_model > 1:
         import ray
 
@@ -493,7 +499,7 @@ def reorg_answer_file(answer_file):
         args.temperature,
         args.posterior_threshold,
         args.posterior_alpha,
-        medusa_choices,
+        args.medusa_choices,
     )
 
     reorg_answer_file(answer_file)