Skip to content

Commit a71e3c6

Browse files
committed
update
1 parent e5dc446 commit a71e3c6

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

fastchat/serve/monitor/monitor.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def make_category_arena_leaderboard_md(arena_df, arena_subset_df, name="Overall"
8787

8888
def make_full_leaderboard_md():
8989
leaderboard_md = """
90-
Three benchmarks are displayed: **Arena Elo**, **MT-Bench** and **MMLU**.
90+
Three benchmarks are displayed: **Arena Score**, **MT-Bench** and **MMLU**.
9191
- [Chatbot Arena](https://chat.lmsys.org/?arena) - a crowdsourced, randomized battle platform. We use 500K+ user votes to compute model strength.
9292
- [MT-Bench](https://arxiv.org/abs/2306.05685): a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.
9393
- [MMLU](https://arxiv.org/abs/2009.03300) (5-shot): a test to measure a model's multitask accuracy on 57 tasks.
@@ -350,7 +350,7 @@ def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
350350
# model display name
351351
row.append(model_name)
352352
# elo rating
353-
rating = f"{round(arena_df.iloc[i]['rating'])}"
353+
rating = round(arena_df.iloc[i]['rating'])
354354
row.append(rating)
355355
upper_diff = round(
356356
arena_df.iloc[i]["rating_q975"] - arena_df.iloc[i]["rating"]
@@ -440,7 +440,7 @@ def update_leaderboard_df(arena_table_vals):
440440
"Rank* (UB)",
441441
"Delta",
442442
"Model",
443-
"Arena Elo",
443+
"Arena Score",
444444
"95% CI",
445445
"Votes",
446446
"Organization",
@@ -558,7 +558,7 @@ def update_leaderboard_and_plots(category):
558558
"Knowledge Cutoff",
559559
],
560560
datatype=[
561-
"str",
561+
"number",
562562
"markdown",
563563
"number",
564564
"str",
@@ -629,15 +629,15 @@ def update_leaderboard_and_plots(category):
629629
headers=[
630630
"Rank* (UB)",
631631
"🤖 Model",
632-
"⭐ Arena Elo",
632+
"⭐ Arena Score",
633633
"📊 95% CI",
634634
"🗳️ Votes",
635635
"Organization",
636636
"License",
637637
"Knowledge Cutoff",
638638
],
639639
datatype=[
640-
"str",
640+
"number",
641641
"markdown",
642642
"number",
643643
"str",
@@ -724,7 +724,7 @@ def build_full_leaderboard_tab(elo_results, model_table_df):
724724
gr.Dataframe(
725725
headers=[
726726
"Model",
727-
"Arena Elo",
727+
"Arena Score",
728728
"MT-bench",
729729
"MMLU",
730730
"Organization",

0 commit comments

Comments
 (0)