add a model loader page and some settings.

Mrw33554432 · Mrw33554432 · commit 0c55d52603bc · 2023-09-17T15:36:29.000+10:00
diff --git a/simple_gradio_interface.py b/simple_gradio_interface.py
@@ -4,56 +4,108 @@
 from medusa.model.medusa_model import MedusaModel
 from fastchat.model.model_adapter import get_conversation_template
 
-# Global variable to store the chat history
+# Global variables
 chat_history = ""
+model = None
+tokenizer = None
+conv = None
 
 
-def medusa_chat_interface(user_input):
+def load_model_function(model_name, load_in_8bit=False, load_in_4bit=False):
+    model_name = model_name or "FasterDecoding/medusa-vicuna-7b-v1.3"
+    global model, tokenizer, conv
+
+    try:
+        model = MedusaModel.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16,
+            low_cpu_mem_usage=True,
+            device_map="auto",
+            load_in_8bit=load_in_8bit,
+            load_in_4bit=load_in_4bit
+        )
+        tokenizer = model.get_tokenizer()
+        conv = get_conversation_template("vicuna")
+        return "Model loaded successfully!"
+    except:
+        return "Error loading the model. Please check the model name and try again."
+
+
+def reset_conversation():
+    """
+    Reset the global conversation and chat history
+    """
+    global conv, chat_history
+    conv = get_conversation_template("vicuna")
+    chat_history = ""
+
+
+def medusa_chat_interface(user_input, temperature, max_steps, no_history):
     global model, tokenizer, conv, chat_history
 
-    # Add user's input to chat history
-    chat_history += "\nYou: " + user_input
+    # Reset the conversation if no_history is checked
+    if no_history:
+        reset_conversation()
 
-    # Process the user input and get the model's response
+    if not model or not tokenizer:
+        return "Error: Model not loaded!", chat_history
+
+    chat_history += "\nYou: " + user_input
     conv.append_message(conv.roles[0], user_input)
-    conv.append_message(conv.roles[1], '')  # Placeholder for the Medusa response
+    conv.append_message(conv.roles[1], '')
     prompt = conv.get_prompt()
-    print(prompt)
 
     input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.base_model.device)
 
-    outputs = model.medusa_generate(input_ids, temperature=0.7, max_steps=512)
+    outputs = model.medusa_generate(input_ids, temperature=temperature, max_steps=max_steps)
     response = ""
     for output in outputs:
         response = output['text']
-        # Send the current response to the output box
         yield response, chat_history
         time.sleep(0.01)
 
-    # Update chat history with the complete Medusa's response after the loop
     chat_history += "\nMedusa: " + response.strip()
 
     return response, chat_history
 
 
 if __name__ == "__main__":
-    MODEL_PATH = "FasterDecoding/medusa-vicuna-7b-v1.3"
-    model = MedusaModel.from_pretrained(
-        MODEL_PATH,
-        torch_dtype=torch.float16,
-        low_cpu_mem_usage=True,
-        device_map="auto"
+    load_model_interface = gr.Interface(
+        load_model_function,
+        [
+            gr.components.Textbox(placeholder="FasterDecoding/medusa-vicuna-7b-v1.3", label="Model Name"),
+            gr.components.Checkbox(label="Use 8-bit Quantization"),
+            gr.components.Checkbox(label="Use 4-bit Quantization"),
+        ],
+        gr.components.Textbox(label="Model Load Status", type="text"),
+        description="Load Medusa Model",
+        title="Medusa Model Loader",
+        live=False,
+        api_name="load_model"
     )
-    tokenizer = model.get_tokenizer()
-    conv = get_conversation_template("vicuna")
 
-    interface = gr.Interface(
+    # Chat Interface
+    chat_interface = gr.Interface(
         medusa_chat_interface,
-        gr.components.Textbox(placeholder="Ask Medusa..."),
-        [gr.components.Textbox(label="Medusa's Response", type="text"),
-         gr.components.Textbox(label="Chat History", type="text")],
+        [
+            gr.components.Textbox(placeholder="Ask Medusa...", label="User Input"),
+            gr.components.Slider(minimum=0, maximum=1.5, label="Temperature"),
+            gr.components.Slider(minimum=50, maximum=1000, label="Max Steps"),
+            gr.components.Checkbox(label="No History"),
+        ],
+        [
+            gr.components.Textbox(label="Medusa's Response", type="text"),
+            gr.components.Textbox(label="Chat History", type="text")
+        ],
         live=False,
         description="Chat with Medusa",
-        title="Medusa Chatbox"
+        title="Medusa Chatbox",
+        api_name="chat"
     )
-    interface.queue().launch()
+
+    # Combine the interfaces in a TabbedInterface
+    combined_interface = gr.TabbedInterface([load_model_interface, chat_interface],
+                                            ["Load Model", "Chat"])
+
+    # Launch the combined interface
+    combined_interface.queue().launch()