Skip to content

Commit 827aaba

Browse files
authored
added mirroring to hf leaderboard (#3287)
1 parent 1f60e7f commit 827aaba

File tree

1 file changed

+16
-9
lines changed

1 file changed

+16
-9
lines changed

fastchat/serve/monitor/monitor.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,18 @@
3232
leader_component_values = [None] * 5
3333

3434

35-
def make_default_md(arena_df, elo_results):
35+
def make_default_md(arena_df, elo_results, mirror=False):
36+
mirror_str = "<span style='color: red; font-weight: bold;'>This is a mirror of the live leaderboard created and maintained by the [LMSYS Organization](https://lmsys.org).</span>"
3637
leaderboard_md = f"""
37-
# 🏆 LMSYS Chatbot Arena Leaderboard
38-
| [Vote](https://chat.lmsys.org) | [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2403.04132) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |
38+
# 🏆 LMSYS Chatbot Arena Leaderboard
39+
| [Website](https://lmsys.org) | [Vote](https://chat.lmsys.org) | [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2403.04132) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |
40+
41+
{mirror_str if mirror else ""}
42+
43+
LMSYS [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) is a crowdsourced open platform for LLM evals. We've collected over **800,000** human pairwise comparisons to rank LLMs with the [Bradley-Terry model](https://en.wikipedia.org/wiki/Bradley%E2%80%93Terry_model) and display the model ratings in Elo-scale.
44+
You can find more details in our [paper](https://arxiv.org/abs/2403.04132).
45+
"""
3946

40-
LMSYS [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) is a crowdsourced open platform for LLM evals.
41-
We've collected over **800,000** human pairwise comparisons to rank LLMs with the [Bradley-Terry model](https://en.wikipedia.org/wiki/Bradley%E2%80%93Terry_model) and display the model ratings in Elo-scale.
42-
You can find more details in our [paper](https://arxiv.org/abs/2403.04132).
43-
"""
4447
return leaderboard_md
4548

4649

@@ -389,7 +392,9 @@ def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
389392
}
390393

391394

392-
def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=False):
395+
def build_leaderboard_tab(
396+
elo_results_file, leaderboard_table_file, show_plot=False, mirror=False
397+
):
393398
arena_dfs = {}
394399
category_elo_results = {}
395400
if elo_results_file is None: # Do live update
@@ -412,7 +417,9 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
412417
p3 = category_elo_results["Overall"]["bootstrap_elo_rating"]
413418
p4 = category_elo_results["Overall"]["average_win_rate_bar"]
414419
arena_df = arena_dfs["Overall"]
415-
default_md = make_default_md(arena_df, category_elo_results["Overall"])
420+
default_md = make_default_md(
421+
arena_df, category_elo_results["Overall"], mirror=mirror
422+
)
416423

417424
md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown")
418425
if leaderboard_table_file:

0 commit comments

Comments
 (0)