p2l stuff

efrick2002 · efrick2002 · commit 1e4c97aba648 · 2025-01-10T22:06:28.000Z
diff --git a/fastchat/model/model_adapter.py b/fastchat/model/model_adapter.py
@@ -2489,7 +2489,7 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
 
 class NoSystemAdapter(BaseModelAdapter):
     def match(self, model_path: str):
-        keyword_list = ["athene-70b"]
+        keyword_list = ["athene-70b", "p2l"]
 
         for keyword in keyword_list:
             if keyword == model_path.lower():
diff --git a/fastchat/serve/api_provider.py b/fastchat/serve/api_provider.py
@@ -246,6 +246,17 @@ def get_api_provider_stream_iter(
             api_key=model_api_dict["api_key"],
             conversation_id=state.conv_id,
         )
+    elif model_api_dict["api_type"] == "p2l":
+        prompt = conv.to_openai_api_messages()
+        stream_iter = p2l_api_stream_iter(
+            model_api_dict["model_name"],
+            prompt,
+            temperature,
+            top_p,
+            max_new_tokens,
+            api_base=model_api_dict["api_base"],
+            api_key=model_api_dict["api_key"],
+        )
     else:
         raise NotImplementedError()
 
@@ -412,6 +423,72 @@ def column_api_stream_iter(
         }
 
 
+def p2l_api_stream_iter(
+    model_name,
+    messages,
+    temperature,
+    top_p,
+    max_new_tokens,
+    api_base=None,
+    api_key=None,
+):
+    import openai
+    
+    client = openai.OpenAI(
+        base_url=api_base,
+        api_key=api_key or "-",
+        timeout=180,
+    )
+
+    # Make requests for logging
+    text_messages = []
+    for message in messages:
+        if type(message["content"]) == str:  # text-only model
+            text_messages.append(message)
+        else:  # vision model
+            filtered_content_list = [
+                content for content in message["content"] if content["type"] == "text"
+            ]
+            text_messages.append(
+                {"role": message["role"], "content": filtered_content_list}
+            )
+
+    gen_params = {
+        "model": model_name,
+        "prompt": text_messages,
+        "temperature": None,
+        "top_p": None,
+        "max_new_tokens": max_new_tokens,
+    }
+    logger.info(f"==== request ====\n{gen_params}")
+
+    res = client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        max_tokens=max_new_tokens,
+        stream=True,
+    )
+    text = ""
+    for chunk_idx, chunk in enumerate(res):
+        if len(chunk.choices) > 0:
+            text += chunk.choices[0].delta.content or ""
+
+            data = {
+                "text": text,
+                "error_code": 0,
+            }
+
+            if chunk_idx == 0:
+
+                if hasattr(chunk.choices[0].delta, "model"):
+                    data["ans_model"] = chunk.choices[0].delta.model
+                
+                if hasattr(chunk, "router_outputs"):
+                    data["router_outputs"] = chunk.router_outputs
+            
+            yield data
+
+
 def upload_openai_file_to_gcs(file_id):
     import openai
     from google.cloud import storage
diff --git a/fastchat/serve/gradio_block_arena_anony.py b/fastchat/serve/gradio_block_arena_anony.py
@@ -175,7 +175,25 @@ def share_click(state0, state1, model_selector0, model_selector1, request: gr.Re
         )
 
 
-SAMPLING_WEIGHTS = {}
+SAMPLING_WEIGHTS = {'claude-3-5-haiku-20241022': 1,
+ 'claude-3-5-sonnet-20240620': 1,
+ 'claude-3-5-sonnet-20241022': 1,
+ 'gpt-4-1106-preview': 1,
+ 'gpt-4-turbo-2024-04-09': 1,
+ 'gpt-4o-2024-05-13': 1,
+ 'gpt-4o-2024-08-06': 1,
+ 'gpt-4o-mini-2024-07-18': 1,
+ 'o1-mini': 1,
+ 'yi-lightning': 1,
+ 'llama-3.1-405b-instruct-fp8': 1,
+ 'llama-3.1-8b-instruct': 1,
+ 'llama-3.1-70b-instruct': 1,
+ 'gemini-1.5-pro-001': 1,
+ 'mistral-large-2407': 1,
+ 'qwen2.5-72b-instruct': 1,
+ 'gemma-2-27b-it': 1,
+ 'glm-4-plus': 1,
+ 'p2l': 1000}
 
 # target model sampling weights will be boosted.
 BATTLE_TARGETS = {}
diff --git a/fastchat/serve/gradio_web_server.py b/fastchat/serve/gradio_web_server.py
@@ -11,7 +11,7 @@
 import random
 import time
 import uuid
-from typing import List
+from typing import List, Dict
 
 import gradio as gr
 import requests
@@ -119,6 +119,8 @@ def __init__(self, model_name, is_vision=False):
         self.model_name = model_name
         self.oai_thread_id = None
         self.is_vision = is_vision
+        self.ans_models = []
+        self.router_outputs = []
 
         # NOTE(chris): This could be sort of a hack since it assumes the user only uploads one image. If they can upload multiple, we should store a list of image hashes.
         self.has_csam_image = False
@@ -128,6 +130,15 @@ def __init__(self, model_name, is_vision=False):
             self.regen_support = False
         self.init_system_prompt(self.conv, is_vision)
 
+    def update_ans_models(self, ans: str) -> None:
+
+        self.ans_models.append(ans)
+
+    def update_router_outputs(self, outputs: Dict[str, float]) -> None:
+
+        self.router_outputs.append(outputs)
+
+
     def init_system_prompt(self, conv, is_vision):
         system_prompt = conv.get_system_message(is_vision)
         if len(system_prompt) == 0:
@@ -154,6 +165,20 @@ def dict(self):
             }
         )
 
+        if self.ans_models:
+            base.update(
+                {
+                    "ans_models": self.ans_models,
+                }
+            )
+        
+        if self.router_outputs:
+            base.update(
+                {
+                    "router_outputs": self.router_outputs,
+                }
+            )
+
         if self.is_vision:
             base.update({"has_csam_image": self.has_csam_image})
         return base
@@ -420,7 +445,7 @@ def is_limit_reached(model_name, ip):
 
 
 def bot_response(
-    state,
+    state: State,
     temperature,
     top_p,
     max_new_tokens,
@@ -532,6 +557,23 @@ def bot_response(
     try:
         data = {"text": ""}
         for i, data in enumerate(stream_iter):
+            
+            # Change for P2L:
+            if i == 0:
+
+                if "ans_model" in data:
+
+                    ans_model = data.get("ans_model")
+                
+                    state.update_ans_models(ans_model)
+                
+                if "router_outputs" in data:
+
+                    router_outputs = data.get("router_outputs")
+
+                    state.update_router_outputs(router_outputs)
+
+
             if data["error_code"] == 0:
                 output = data["text"].strip()
                 conv.update_last_message(output + "▌")
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,8 +19,8 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-model_worker = ["accelerate>=0.21", "peft", "sentencepiece", "torch", "transformers>=4.31.0", "protobuf"]
-webui = ["gradio>=4.10"]
+model_worker = ["accelerate>=0.21", "peft", "sentencepiece", "torch", "transformers>=4.31.0", "protobuf", "openai"]
+webui = ["gradio>=4.10", "plotly", "scipy"]
 train = ["einops", "flash-attn>=2.0", "wandb"]
 llm_judge = ["openai<1", "anthropic>=0.3", "ray"]
 dev = ["black==23.3.0", "pylint==2.8.2"]