add base model override

Btlmd · Btlmd · commit e5cdff3257b5 · 2023-09-17T12:51:53.000Z
diff --git a/README.md b/README.md
@@ -99,7 +99,7 @@ You can use the following command for launching a CLI interface:
 ```bash
 CUDA_VISIBLE_DEVICES=0 python -m medusa.inference.cli --model [path of medusa model]
 ```
-You can also pass `--load-in-8bit` or `--load-in-4bit` to load the base model in quantized format.
+You can also pass `--load-in-8bit` or `--load-in-4bit` to load the base model in quantized format. If you download the base model elsewhere, you may override base model name or path with `--base-model  [path of base model]`.
 
 ### Training
 For training, please install:
diff --git a/medusa/inference/cli.py b/medusa/inference/cli.py
@@ -36,6 +36,7 @@ def main(args):
     try:
         model = MedusaModel.from_pretrained(
             args.model,
+            args.base_model,
             torch_dtype=torch.float16,
             low_cpu_mem_usage=True,
             device_map="auto",
@@ -185,6 +186,7 @@ def reload_conv(conv):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--model", type=str, required=True, help="Model name or path.")
+    parser.add_argument("--base-model", type=str, default=None, help="Base model name or path.")
     parser.add_argument(
         "--load-in-8bit", action="store_true", help="Use 8-bit quantization"
     )
diff --git a/medusa/model/medusa_model.py b/medusa/model/medusa_model.py
@@ -110,6 +110,7 @@ def get_tokenizer(self):
     def from_pretrained(
         cls,
         medusa_head_name_or_path,
+        base_model=None,
         **kwargs,
     ):
         """
@@ -121,6 +122,10 @@ def from_pretrained(
             MedusaModel: A MedusaModel instance loaded from the given path.
         """
         medusa_config = MedusaConfig.from_pretrained(medusa_head_name_or_path)
+        if base_model:
+            print("Overriding base model as:", base_model)
+            medusa_config.base_model_name_or_path = base_model
+            
         base_model = KVLlamaForCausalLM.from_pretrained(
             medusa_config.base_model_name_or_path, **kwargs
         )