Skip to content

Commit 6f827c1

Browse files
committed
chore: Description added
1 parent 4996326 commit 6f827c1

File tree

3 files changed

+12
-4
lines changed

3 files changed

+12
-4
lines changed

agents_mcp_usage/multi_mcp/eval_multi_mcp/dashboard_config.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,15 @@
1919

2020
MERBENCH_CONFIG = {
2121
# --- General Dashboard Settings ---
22-
"title": "Merbench - LLM Evaluation Benchmark",
23-
"icon": "🏆", # Emoji for the browser tab
22+
"title": "🧜‍♀️ Merbench - LLM Evaluation ",
23+
"description": (
24+
"Getting LLMs to consistently nail the mermaid diagram syntax can be... an adventure. "
25+
"\n\nMerbench tests this ability by providing an LLM Agent access to an MCP server that both validates "
26+
"and provides error messages to guide correction of syntax. There are three different difficulty levels (test cases), "
27+
"and the LLM is given a fixed number of attempts to fix the diagram, if this is exceeded, the test case is considered failed. "
28+
"\n\nThis leaderboard shows the average success rate across all selected models and difficulty levels."
29+
),
30+
"icon": "🧜‍♀️", # Emoji for the browser tab
2431
# --- Primary Metric Configuration ---
2532
# The primary metric is the main score used for the leaderboard and
2633
# the y-axis of the Pareto frontier plot.

agents_mcp_usage/multi_mcp/eval_multi_mcp/merbench_ui.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,7 @@ def main() -> None:
653653
eval_config = EVAL_CONFIG # Use the validated config
654654

655655
st.title(eval_config.title)
656-
st.subheader("LLM Evaluation Benchmark Dashboard")
656+
st.markdown(eval_config.description)
657657

658658
# --- Sidebar Setup ---
659659
st.sidebar.header("⚙️ Data Configuration")
@@ -817,7 +817,7 @@ def main() -> None:
817817
else:
818818
st.warning("No data available for the current filter selection.")
819819

820-
st.header("📈 Pareto Frontier Analysis")
820+
st.header("📈 Pareto Frontier")
821821
pareto_config = eval_config.plots.pareto
822822
x_axis_mode = st.radio(
823823
"Compare performance against:",

agents_mcp_usage/multi_mcp/eval_multi_mcp/schemas.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ class CostCalculationConfig(BaseModel):
9393

9494
class DashboardConfig(BaseModel):
9595
title: str
96+
description: str
9697
icon: str
9798
primary_metric: PrimaryMetricConfig
9899
grouping: GroupingConfig

0 commit comments

Comments
 (0)