Merge branch 'lm-sys:main' into fix-llama3.1_template

nkristina · web-flow · commit 7ccb851452c9 · 2025-01-24T16:48:21.000+01:00
diff --git a/fastchat/model/model_adapter.py b/fastchat/model/model_adapter.py
@@ -2483,7 +2483,7 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
 
 class NoSystemAdapter(BaseModelAdapter):
     def match(self, model_path: str):
-        keyword_list = ["athene-70b"]
+        keyword_list = ["athene-70b", "p2l"]
 
         for keyword in keyword_list:
             if keyword == model_path.lower():
diff --git a/fastchat/serve/api_provider.py b/fastchat/serve/api_provider.py
@@ -23,6 +23,7 @@ def get_api_provider_stream_iter(
     top_p,
     max_new_tokens,
     state,
+    extra_body=None,
 ):
     if model_api_dict["api_type"] == "openai":
         if model_api_dict.get("vision-arena", False):
@@ -246,6 +247,18 @@ def get_api_provider_stream_iter(
             api_key=model_api_dict["api_key"],
             conversation_id=state.conv_id,
         )
+    elif model_api_dict["api_type"] == "p2l":
+        prompt = conv.to_openai_api_messages()
+        stream_iter = p2l_api_stream_iter(
+            model_api_dict["model_name"],
+            prompt,
+            temperature,
+            top_p,
+            max_new_tokens,
+            api_base=model_api_dict["api_base"],
+            api_key=model_api_dict["api_key"],
+            extra_body=extra_body,
+        )
     else:
         raise NotImplementedError()
 
@@ -412,6 +425,74 @@ def column_api_stream_iter(
         }
 
 
+def p2l_api_stream_iter(
+    model_name,
+    messages,
+    temperature,
+    top_p,
+    max_new_tokens,
+    api_base=None,
+    api_key=None,
+    extra_body=None,
+):
+    import openai
+
+    client = openai.OpenAI(
+        base_url=api_base,
+        api_key=api_key or "-",
+        timeout=180,
+    )
+
+    # Make requests for logging
+    text_messages = []
+    for message in messages:
+        if type(message["content"]) == str:  # text-only model
+            text_messages.append(message)
+        else:  # vision model
+            filtered_content_list = [
+                content for content in message["content"] if content["type"] == "text"
+            ]
+            text_messages.append(
+                {"role": message["role"], "content": filtered_content_list}
+            )
+
+    gen_params = {
+        "model": model_name,
+        "prompt": text_messages,
+        "temperature": None,
+        "top_p": None,
+        "max_new_tokens": max_new_tokens,
+        "extra_body": extra_body,
+    }
+    logger.info(f"==== request ====\n{gen_params}")
+
+    res = client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        max_tokens=max_new_tokens,
+        stream=True,
+        extra_body=extra_body,
+    )
+    text = ""
+    for chunk_idx, chunk in enumerate(res):
+        if len(chunk.choices) > 0:
+            text += chunk.choices[0].delta.content or ""
+
+            data = {
+                "text": text,
+                "error_code": 0,
+            }
+
+            if chunk_idx == 0:
+                if hasattr(chunk.choices[0].delta, "model"):
+                    data["ans_model"] = chunk.choices[0].delta.model
+
+                if hasattr(chunk, "router_outputs"):
+                    data["router_outputs"] = chunk.router_outputs
+
+            yield data
+
+
 def upload_openai_file_to_gcs(file_id):
     import openai
     from google.cloud import storage
diff --git a/fastchat/serve/gradio_web_server.py b/fastchat/serve/gradio_web_server.py
@@ -11,7 +11,7 @@
 import random
 import time
 import uuid
-from typing import List
+from typing import List, Dict
 
 import gradio as gr
 import requests
@@ -119,6 +119,8 @@ def __init__(self, model_name, is_vision=False):
         self.model_name = model_name
         self.oai_thread_id = None
         self.is_vision = is_vision
+        self.ans_models = []
+        self.router_outputs = []
 
         # NOTE(chris): This could be sort of a hack since it assumes the user only uploads one image. If they can upload multiple, we should store a list of image hashes.
         self.has_csam_image = False
@@ -128,6 +130,12 @@ def __init__(self, model_name, is_vision=False):
             self.regen_support = False
         self.init_system_prompt(self.conv, is_vision)
 
+    def update_ans_models(self, ans: str) -> None:
+        self.ans_models.append(ans)
+
+    def update_router_outputs(self, outputs: Dict[str, float]) -> None:
+        self.router_outputs.append(outputs)
+
     def init_system_prompt(self, conv, is_vision):
         system_prompt = conv.get_system_message(is_vision)
         if len(system_prompt) == 0:
@@ -154,6 +162,20 @@ def dict(self):
             }
         )
 
+        if self.ans_models:
+            base.update(
+                {
+                    "ans_models": self.ans_models,
+                }
+            )
+
+        if self.router_outputs:
+            base.update(
+                {
+                    "router_outputs": self.router_outputs,
+                }
+            )
+
         if self.is_vision:
             base.update({"has_csam_image": self.has_csam_image})
         return base
@@ -420,7 +442,7 @@ def is_limit_reached(model_name, ip):
 
 
 def bot_response(
-    state,
+    state: State,
     temperature,
     top_p,
     max_new_tokens,
@@ -504,6 +526,8 @@ def bot_response(
         if not custom_system_prompt:
             conv.set_system_message("")
 
+        extra_body = None
+
         if use_recommended_config:
             recommended_config = model_api_dict.get("recommended_config", None)
             if recommended_config is not None:
@@ -512,6 +536,7 @@ def bot_response(
                 max_new_tokens = recommended_config.get(
                     "max_new_tokens", max_new_tokens
                 )
+                extra_body = recommended_config.get("extra_body", None)
 
         stream_iter = get_api_provider_stream_iter(
             conv,
@@ -521,6 +546,7 @@ def bot_response(
             top_p,
             max_new_tokens,
             state,
+            extra_body=extra_body,
         )
 
     html_code = ' <span class="cursor"></span> '
@@ -532,6 +558,18 @@ def bot_response(
     try:
         data = {"text": ""}
         for i, data in enumerate(stream_iter):
+            # Change for P2L:
+            if i == 0:
+                if "ans_model" in data:
+                    ans_model = data.get("ans_model")
+
+                    state.update_ans_models(ans_model)
+
+                if "router_outputs" in data:
+                    router_outputs = data.get("router_outputs")
+
+                    state.update_router_outputs(router_outputs)
+
             if data["error_code"] == 0:
                 output = data["text"].strip()
                 conv.update_last_message(output + "▌")
@@ -688,6 +726,22 @@ def bot_response(
 .block {
   overflow-y: hidden !important;
 }
+
+.visualizer {
+    overflow: hidden;
+    height: 60vw;
+    border: 1px solid lightgrey; 
+    border-radius: 10px;
+}
+
+@media screen and (max-width: 769px) {
+    .visualizer {
+        height: 180vw;
+        overflow-y: scroll;
+        width: 100%;
+        overflow-x: hidden;
+    }
+}
 """
 
 
diff --git a/fastchat/serve/gradio_web_server_multi.py b/fastchat/serve/gradio_web_server_multi.py
@@ -4,10 +4,6 @@
 """
 
 import argparse
-import pickle
-import time
-from typing import List
-
 import gradio as gr
 
 from fastchat.serve.gradio_block_arena_anony import (
@@ -54,6 +50,36 @@
 logger = build_logger("gradio_web_server_multi", "gradio_web_server_multi.log")
 
 
+def build_visualizer():
+    visualizer_markdown = """
+    # 🔍 Arena Visualizer
+    This tool provides an interactive way to explore how people are using Chatbot Arena. 
+    Using *[topic clustering](https://github.com/MaartenGr/BERTopic)*, we organized user-submitted prompts from Arena battles into broad and specific categories. 
+    Dive in to uncover insights about the distribution and themes of these prompts!
+    """
+    gr.Markdown(visualizer_markdown, elem_id="visualizer_markdown")
+    expandText = "👇 Expand to see detailed instructions on how to use the visualizer"
+    with gr.Accordion(expandText, open=False):
+        instructions = """
+        - Hover Over Segments: View the category name, the number of prompts, and their percentage.
+            - *On mobile devices*: Tap instead of hover.
+        - Click to Explore: 
+            - Click on a main category to see its subcategories.
+            - Click on subcategories to see example prompts in the sidebar.
+        - Undo and Reset: Click the center of the chart to return to the top level.
+
+        Visualizer is created using Arena battle data collected from 2024/6 to 2024/8.
+        """
+        gr.Markdown(instructions)
+
+    frame = """
+                <iframe class="visualizer" width="100%"
+                        src="https://storage.googleapis.com/public-arena-no-cors/index.html">
+                </iframe>
+            """
+    gr.HTML(frame)
+
+
 def load_demo(context: Context, request: gr.Request):
     ip = get_ip(request)
     logger.info(f"load_demo. ip: {ip}. params: {request.query_params}")
@@ -199,12 +225,14 @@ def build_demo(
                         arena_hard_table,
                         show_plot=True,
                     )
+            if args.show_visualizer:
+                with gr.Tab("🔍 Arena Visualizer", id=5):
+                    build_visualizer()
 
             with gr.Tab("ℹ️ About Us", id=4):
-                about = build_about()
+                build_about()
 
         context_state = gr.State(context)
-        url_params = gr.JSON(visible=False)
 
         if args.model_list_mode not in ["once", "reload"]:
             raise ValueError(f"Unknown model list mode: {args.model_list_mode}")
@@ -271,7 +299,8 @@ def build_demo(
     parser.add_argument(
         "--gradio-auth-path",
         type=str,
-        help='Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3"',
+        help='Set the gradio authentication file path. The file should contain one or \
+              more user:password pairs in this format: "u1:p1,u2:p2,u3:p3"',
         default=None,
     )
     parser.add_argument(
@@ -286,7 +315,8 @@ def build_demo(
     parser.add_argument(
         "--gradio-root-path",
         type=str,
-        help="Sets the gradio root path, eg /abc/def. Useful when running behind a reverse-proxy or at a custom URL path prefix",
+        help="Sets the gradio root path, eg /abc/def. Useful when running behind a \
+              reverse-proxy or at a custom URL path prefix",
     )
     parser.add_argument(
         "--ga-id",
@@ -305,6 +335,12 @@ def build_demo(
         type=str,
         help="Set the password for the gradio web server",
     )
+    parser.add_argument(
+        "--show-visualizer",
+        action="store_true",
+        default=False,
+        help="Show the Data Visualizer tab",
+    )
     args = parser.parse_args()
     logger.info(f"args: {args}")
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,8 +19,8 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-model_worker = ["accelerate>=0.21", "peft", "sentencepiece", "torch", "transformers>=4.31.0", "protobuf"]
-webui = ["gradio>=4.10"]
+model_worker = ["accelerate>=0.21", "peft", "sentencepiece", "torch", "transformers>=4.31.0", "protobuf", "openai", "anthropic"]
+webui = ["gradio>=4.10", "plotly", "scipy"]
 train = ["einops", "flash-attn>=2.0", "wandb"]
 llm_judge = ["openai<1", "anthropic>=0.3", "ray"]
 dev = ["black==23.3.0", "pylint==2.8.2"]