Skip to content

Commit 4c9c98f

Browse files
committed
Merge branch 'main' into moderation-log
2 parents 87b6390 + 1cd4b74 commit 4c9c98f

18 files changed

+579
-308
lines changed

README.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,33 @@ This is the user interface that users will interact with.
237237
By following these steps, you will be able to serve your models using the web UI. You can open your browser and chat with a model now.
238238
If the models do not show up, try to reboot the gradio web server.
239239

240+
## Launch Chatbot Arena (side-by-side battle UI)
241+
242+
Currently, Chatbot Arena is powered by FastChat. Here is how you can launch an instance of Chatbot Arena locally.
243+
244+
FastChat supports popular API-based models such as OpenAI, Anthropic, Gemini, Mistral and more. To add a custom API, please refer to the model support [doc](./docs/model_support.md). Below we take OpenAI models as an example.
245+
246+
Create a JSON configuration file `api_endpoint.json` with the api endpoints of the models you want to serve, for example:
247+
```
248+
{
249+
"gpt-4o-2024-05-13": {
250+
"model_name": "gpt-4o-2024-05-13",
251+
"api_base": "https://api.openai.com/v1",
252+
"api_type": "openai",
253+
"api_key": [Insert API Key],
254+
"anony_only": false
255+
}
256+
}
257+
```
258+
For Anthropic models, specify `"api_type": "anthropic_message"` with your Anthropic key. Similarly, for gemini model, specify `"api_type": "gemini"`. More details can be found in [api_provider.py](https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/api_provider.py).
259+
260+
To serve your own model using local gpus, follow the instructions in [Serving with Web GUI](#serving-with-web-gui).
261+
262+
Now you're ready to launch the server:
263+
```
264+
python3 -m fastchat.serve.gradio_web_server_multi --register-api-endpoint-file api_endpoint.json
265+
```
266+
240267
#### (Optional): Advanced Features, Scalability, Third Party UI
241268
- You can register multiple model workers to a single controller, which can be used for serving a single model with higher throughput or serving multiple models at the same time. When doing so, please allocate different GPUs and ports for different model workers.
242269
```

fastchat/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
# Survey Link URL (to be removed) #00729c
1111
SURVEY_LINK = """<div style='text-align: left; margin: 20px 0;'>
12-
<div style='display: inline-block; border: 2px solid #C41E3A; padding: 20px; padding-bottom: 10px; padding-top: 10px; border-radius: 5px;'>
13-
<span style='color: #C41E3A; font-weight: bold;'>New Launch! Jailbreak models at <a href='https://redarena.ai' style='color: #C41E3A; text-decoration: underline;'>RedTeam Arena</a>. </span>
12+
<div style='display: inline-block; border: 2px solid #00729c; padding: 20px; padding-bottom: 10px; padding-top: 10px; border-radius: 5px;'>
13+
<span style='color: #00729c; font-weight: bold;'>New Launch! Copilot Arena: <a href='https://marketplace.visualstudio.com/items?itemName=copilot-arena.copilot-arena' style='color: #00729c; text-decoration: underline;'>VS Code Extension</a> to compare Top LLMs</span>
1414
</div>
1515
</div>"""
1616
# SURVEY_LINK = ""

fastchat/serve/api_provider.py

Lines changed: 9 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,14 @@ def get_api_provider_stream_iter(
122122
)
123123
elif model_api_dict["api_type"] == "bard":
124124
prompt = conv.to_openai_api_messages()
125-
stream_iter = bard_api_stream_iter(
125+
stream_iter = gemini_api_stream_iter(
126126
model_api_dict["model_name"],
127127
prompt,
128-
temperature,
129-
top_p,
130-
api_key=model_api_dict["api_key"],
128+
None, # use Bard's default temperature
129+
None, # use Bard's default top_p
130+
max_new_tokens,
131+
api_key=(model_api_dict["api_key"] or os.environ["BARD_API_KEY"]),
132+
use_stream=False,
131133
)
132134
elif model_api_dict["api_type"] == "mistral":
133135
if model_api_dict.get("vision-arena", False):
@@ -242,6 +244,7 @@ def get_api_provider_stream_iter(
242244
max_new_tokens,
243245
api_base=model_api_dict["api_base"],
244246
api_key=model_api_dict["api_key"],
247+
conversation_id=state.conv_id,
245248
)
246249
else:
247250
raise NotImplementedError()
@@ -759,75 +762,6 @@ def gemini_api_stream_iter(
759762
}
760763

761764

762-
def bard_api_stream_iter(model_name, conv, temperature, top_p, api_key=None):
763-
del top_p # not supported
764-
del temperature # not supported
765-
766-
if api_key is None:
767-
api_key = os.environ["BARD_API_KEY"]
768-
769-
# convert conv to conv_bard
770-
conv_bard = []
771-
for turn in conv:
772-
if turn["role"] == "user":
773-
conv_bard.append({"author": "0", "content": turn["content"]})
774-
elif turn["role"] == "assistant":
775-
conv_bard.append({"author": "1", "content": turn["content"]})
776-
else:
777-
raise ValueError(f"Unsupported role: {turn['role']}")
778-
779-
params = {
780-
"model": model_name,
781-
"prompt": conv_bard,
782-
}
783-
logger.info(f"==== request ====\n{params}")
784-
785-
try:
786-
res = requests.post(
787-
f"https://generativelanguage.googleapis.com/v1beta2/models/{model_name}:generateMessage?key={api_key}",
788-
json={
789-
"prompt": {
790-
"messages": conv_bard,
791-
},
792-
},
793-
timeout=60,
794-
)
795-
except Exception as e:
796-
logger.error(f"==== error ====\n{e}")
797-
yield {
798-
"text": f"**API REQUEST ERROR** Reason: {e}.",
799-
"error_code": 1,
800-
}
801-
802-
if res.status_code != 200:
803-
logger.error(f"==== error ==== ({res.status_code}): {res.text}")
804-
yield {
805-
"text": f"**API REQUEST ERROR** Reason: status code {res.status_code}.",
806-
"error_code": 1,
807-
}
808-
809-
response_json = res.json()
810-
if "candidates" not in response_json:
811-
logger.error(f"==== error ==== response blocked: {response_json}")
812-
reason = response_json["filters"][0]["reason"]
813-
yield {
814-
"text": f"**API REQUEST ERROR** Reason: {reason}.",
815-
"error_code": 1,
816-
}
817-
818-
response = response_json["candidates"][0]["content"]
819-
pos = 0
820-
while pos < len(response):
821-
# simulate token streaming
822-
pos += 5
823-
time.sleep(0.001)
824-
data = {
825-
"text": response[:pos],
826-
"error_code": 0,
827-
}
828-
yield data
829-
830-
831765
def ai2_api_stream_iter(
832766
model_name,
833767
model_id,
@@ -1262,6 +1196,7 @@ def metagen_api_stream_iter(
12621196
max_new_tokens,
12631197
api_key,
12641198
api_base,
1199+
conversation_id,
12651200
):
12661201
try:
12671202
text_messages = []
@@ -1294,6 +1229,7 @@ def metagen_api_stream_iter(
12941229
"model": model_name,
12951230
"chunks_delimited": True,
12961231
"messages": messages,
1232+
"conversation_id": conversation_id,
12971233
"options": {
12981234
"max_tokens": max_new_tokens,
12991235
"generation_algorithm": "top_p",

fastchat/serve/gradio_block_arena_anony.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,11 @@ def load_demo_side_by_side_anony(models_, url_params):
6060
global models
6161
models = models_
6262

63-
states = (None,) * num_sides
64-
selector_updates = (
63+
states = [None] * num_sides
64+
selector_updates = [
6565
gr.Markdown(visible=True),
6666
gr.Markdown(visible=True),
67-
)
67+
]
6868

6969
return states + selector_updates
7070

@@ -522,6 +522,12 @@ def build_side_by_side_ui_anony(models):
522522
elem_id="chatbot",
523523
height=650,
524524
show_copy_button=True,
525+
latex_delimiters=[
526+
{"left": "$", "right": "$", "display": False},
527+
{"left": "$$", "right": "$$", "display": True},
528+
{"left": r"\(", "right": r"\)", "display": False},
529+
{"left": r"\[", "right": r"\]", "display": True},
530+
],
525531
)
526532

527533
with gr.Row():

fastchat/serve/gradio_block_arena_named.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def set_global_vars_named(enable_moderation_, use_remote_storage_):
5050

5151

5252
def load_demo_side_by_side_named(models, url_params):
53-
states = (None,) * num_sides
53+
states = [None] * num_sides
5454

5555
model_left = models[0] if len(models) > 0 else ""
5656
if len(models) > 1:
@@ -60,10 +60,10 @@ def load_demo_side_by_side_named(models, url_params):
6060
else:
6161
model_right = model_left
6262

63-
selector_updates = (
63+
selector_updates = [
6464
gr.Dropdown(choices=models, value=model_left, visible=True),
6565
gr.Dropdown(choices=models, value=model_right, visible=True),
66-
)
66+
]
6767

6868
return states + selector_updates
6969

@@ -409,6 +409,12 @@ def build_side_by_side_ui_named(models):
409409
elem_id=f"chatbot",
410410
height=650,
411411
show_copy_button=True,
412+
latex_delimiters=[
413+
{"left": "$", "right": "$", "display": False},
414+
{"left": "$$", "right": "$$", "display": True},
415+
{"left": r"\(", "right": r"\)", "display": False},
416+
{"left": r"\[", "right": r"\]", "display": True},
417+
],
412418
)
413419

414420
with gr.Row():

fastchat/serve/gradio_block_arena_vision.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,12 @@ def build_single_vision_language_model_ui(
346346
label="Scroll down and start chatting",
347347
height=650,
348348
show_copy_button=True,
349+
latex_delimiters=[
350+
{"left": "$", "right": "$", "display": False},
351+
{"left": "$$", "right": "$$", "display": True},
352+
{"left": r"\(", "right": r"\)", "display": False},
353+
{"left": r"\[", "right": r"\]", "display": True},
354+
],
349355
)
350356

351357
with gr.Row():

fastchat/serve/gradio_block_arena_vision_anony.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,12 @@ def build_side_by_side_vision_ui_anony(context: Context, random_questions=None):
474474
elem_id="chatbot",
475475
height=650,
476476
show_copy_button=True,
477+
latex_delimiters=[
478+
{"left": "$", "right": "$", "display": False},
479+
{"left": "$$", "right": "$$", "display": True},
480+
{"left": r"\(", "right": r"\)", "display": False},
481+
{"left": r"\[", "right": r"\]", "display": True},
482+
],
477483
)
478484

479485
with gr.Row():

fastchat/serve/gradio_block_arena_vision_named.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,12 @@ def build_side_by_side_vision_ui_named(context: Context, random_questions=None):
409409
elem_id=f"chatbot",
410410
height=650,
411411
show_copy_button=True,
412+
latex_delimiters=[
413+
{"left": "$", "right": "$", "display": False},
414+
{"left": "$$", "right": "$$", "display": True},
415+
{"left": r"\(", "right": r"\)", "display": False},
416+
{"left": r"\[", "right": r"\]", "display": True},
417+
],
412418
)
413419

414420
with gr.Row():

fastchat/serve/gradio_web_server.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -942,6 +942,12 @@ def build_single_model_ui(models, add_promotion_links=False):
942942
label="Scroll down and start chatting",
943943
height=650,
944944
show_copy_button=True,
945+
latex_delimiters=[
946+
{"left": "$", "right": "$", "display": False},
947+
{"left": "$$", "right": "$$", "display": True},
948+
{"left": r"\(", "right": r"\)", "display": False},
949+
{"left": r"\[", "right": r"\]", "display": True},
950+
],
945951
)
946952
with gr.Row():
947953
textbox = gr.Textbox(
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
## Download dataset
2+
We have pre-generated several category classifier benchmarks and ground truths. You can download them (with [`git-lfs`](https://git-lfs.com) installed) to the directory `classify/` by running
3+
```console
4+
> git clone https://huggingface.co/datasets/lmarena-ai/categories-benchmark-eval
5+
// cd into classify/ and then copy the label_bench directory to the current directory
6+
> cp -r categories-benchmark-eval/label_bench .
7+
```
8+
Your label_bench directory should follow the structure:
9+
```markdown
10+
├── label_bench/
11+
│ ├── creative_writing_bench/
12+
│ │ ├── data/
13+
│ │ │ └── llama-v3p1-70b-instruct.json
14+
│ │ └── test.json
15+
│ ├── ...
16+
│ ├── your_bench_name/
17+
│ │ ├── data/
18+
│ │ │ ├── your_classifier_data_1.json
19+
│ │ │ ├── your_classifier_data_2.json
20+
│ │ │ └── ...
21+
│ │ └── test.json (your ground truth)
22+
└── ...
23+
```
24+
25+
## How to evaluate your category classifier?
26+
27+
To test your new classifier for a new category, you would have to make sure you created the category child class in `category.py`. Then, to generate classification labels, make the necessary edits in `config.yaml` and run
28+
```console
29+
python label.py --config config.yaml --testing
30+
```
31+
32+
Then, add your new category bench to `tag_names` in `display_score.py`. After making sure that you also have a correctly formatted ground truth json file, you can report the performance of your classifier by running
33+
```console
34+
python display_score.py --bench <your_bench>
35+
```
36+
37+
If you want to check out conflicts between your classifier and ground truth, use
38+
```console
39+
python display_score.py --bench <your_bench> --display-conflict
40+
```
41+
42+
Example output:
43+
```console
44+
> python display_score.py --bench if_bench --display-conflict
45+
Model: gpt-4o-mini-2024-07-18
46+
Accuracy: 0.967
47+
Precision: 0.684
48+
Recall: 0.918
49+
50+
###### CONFLICT ######
51+
52+
Ground Truth = True; Pred = False
53+
\####################
54+
...
55+
56+
Ground Truth = False; Pred = True
57+
\####################
58+
...
59+
```
60+

0 commit comments

Comments
 (0)