chore: Description added

andrewginns · andrewginns · commit 6f827c15decd · 2025-06-11T08:54:46.000Z
diff --git a/agents_mcp_usage/multi_mcp/eval_multi_mcp/dashboard_config.py b/agents_mcp_usage/multi_mcp/eval_multi_mcp/dashboard_config.py
@@ -19,8 +19,15 @@
 
 MERBENCH_CONFIG = {
     # --- General Dashboard Settings ---
-    "title": "Merbench - LLM Evaluation Benchmark",
-    "icon": "🏆",  # Emoji for the browser tab
+    "title": "🧜‍♀️ Merbench - LLM Evaluation ",
+    "description": (
+        "Getting LLMs to consistently nail the mermaid diagram syntax can be... an adventure. "
+        "\n\nMerbench tests this ability by providing an LLM Agent access to an MCP server that both validates "
+        "and provides error messages to guide correction of syntax. There are three different difficulty levels (test cases), "
+        "and the LLM is given a fixed number of attempts to fix the diagram, if this is exceeded, the test case is considered failed. "
+        "\n\nThis leaderboard shows the average success rate across all selected models and difficulty levels."
+    ),
+    "icon": "🧜‍♀️",  # Emoji for the browser tab
     # --- Primary Metric Configuration ---
     # The primary metric is the main score used for the leaderboard and
     # the y-axis of the Pareto frontier plot.
diff --git a/agents_mcp_usage/multi_mcp/eval_multi_mcp/merbench_ui.py b/agents_mcp_usage/multi_mcp/eval_multi_mcp/merbench_ui.py
@@ -653,7 +653,7 @@ def main() -> None:
     eval_config = EVAL_CONFIG  # Use the validated config
 
     st.title(eval_config.title)
-    st.subheader("LLM Evaluation Benchmark Dashboard")
+    st.markdown(eval_config.description)
 
     # --- Sidebar Setup ---
     st.sidebar.header("⚙️ Data Configuration")
@@ -817,7 +817,7 @@ def main() -> None:
     else:
         st.warning("No data available for the current filter selection.")
 
-    st.header("📈 Pareto Frontier Analysis")
+    st.header("📈 Pareto Frontier")
     pareto_config = eval_config.plots.pareto
     x_axis_mode = st.radio(
         "Compare performance against:",
diff --git a/agents_mcp_usage/multi_mcp/eval_multi_mcp/schemas.py b/agents_mcp_usage/multi_mcp/eval_multi_mcp/schemas.py
@@ -93,6 +93,7 @@ class CostCalculationConfig(BaseModel):
 
 class DashboardConfig(BaseModel):
     title: str
+    description: str
     icon: str
     primary_metric: PrimaryMetricConfig
     grouping: GroupingConfig