Skip to content

Commit 4a6024c

Browse files
authored
Update monitor_md.py
1 parent e59fd55 commit 4a6024c

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

fastchat/serve/monitor/monitor_md.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,12 @@
1919
"math": "Math",
2020
"if": "Instruction Following",
2121
"multiturn": "Multi-Turn",
22+
"creative_writing": "Creative Writing",
2223
"coding": "Coding",
2324
"coding_style_control": "Coding w/ Style Control",
24-
"hard_6": "Hard Prompts (Overall)",
25+
"hard_6": "Hard Prompts",
2526
"hard_english_6": "Hard Prompts (English)",
26-
"hard_6_style_control": "Hard Prompts (Overall) w/ Style Control",
27+
"hard_6_style_control": "Hard Prompts w/ Style Control",
2728
"long_user": "Longer Query",
2829
"english": "English",
2930
"chinese": "Chinese",
@@ -48,8 +49,8 @@
4849
"Multi-Turn": "Multi-Turn Conversation (>= 2 turns)",
4950
"Coding": "Coding: whether conversation contains code snippets",
5051
"Coding w/ Style Control": "Coding with Style Control",
51-
"Hard Prompts (Overall)": "Hard Prompts (Overall): details in [blog post](https://lmsys.org/blog/2024-05-17-category-hard/)",
52-
"Hard Prompts (Overall) w/ Style Control": "Hard Prompts with Style Control. See details in [blog post](https://lmsys.org/blog/2024-08-28-style-control/).",
52+
"Hard Prompts": "Hard Prompts: details in [blog post](https://lmsys.org/blog/2024-05-17-category-hard/)",
53+
"Hard Prompts w/ Style Control": "Hard Prompts with Style Control. See details in [blog post](https://lmsys.org/blog/2024-08-28-style-control/).",
5354
"Hard Prompts (English)": "Hard Prompts (English), note: the delta is to English Category. details in [blog post](https://lmsys.org/blog/2024-05-17-category-hard/)",
5455
"Longer Query": "Longer Query (>= 500 tokens)",
5556
"English": "English Prompts",
@@ -65,6 +66,7 @@
6566
"Exclude Refusal": 'Exclude model responses with refusal (e.g., "I cannot answer")',
6667
"overall_limit_5_user_vote": "overall_limit_5_user_vote",
6768
"Overall (Deprecated)": "Overall without De-duplicating Top Redundant Queries (top 0.1%). See details in [blog post](https://lmsys.org/blog/2024-05-17-category-hard/#note-enhancing-quality-through-de-duplication).",
69+
"Creative Writing": "Creative Writing",
6870
}
6971
cat_name_to_baseline = {
7072
"Hard Prompts (English)": "English",
@@ -82,7 +84,7 @@ def make_default_md_1(mirror=False):
8284
link_color = "#1976D2" # This color should be clear in both light and dark mode
8385
leaderboard_md = f"""
8486
# 🏆 Chatbot Arena LLM Leaderboard: Community-driven Evaluation for Best LLM and AI chatbots
85-
[Blog](https://blog.lmarena.ai/blog/2023/arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2403.04132) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/6GXcFg3TH8) | [Kaggle Competition](https://www.kaggle.com/competitions/lmsys-chatbot-arena)
87+
[Twitter](https://twitter.com/lmarena_ai) | [Discord](https://discord.gg/6GXcFg3TH8) | [Blog](https://blog.lmarena.ai/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2403.04132) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) | [Kaggle Competition](https://www.kaggle.com/competitions/wsdm-cup-multilingual-chatbot-arena)
8688
"""
8789

8890
return leaderboard_md

0 commit comments

Comments
 (0)