mlcommons · mhmdk0 · Jan 1, 2026 · Jan 24, 2026 · Jan 24, 2026 · Jan 24, 2026
@@ -92,6 +92,7 @@
 cas_folder = "cas"
 training_events_folder = "training_events"
 certificates_folder = "certificates"
+dashboards_folder = "dashboards"
 
 default_base_storage = str(Path.home().resolve() / ".medperf")
 
@@ -164,6 +165,10 @@
         "base": default_base_storage,
         "name": certificates_folder,
     },
+    "dashboards_folder": {
+        "base": default_base_storage,
+        "name": dashboards_folder,
+    },
 }
 
 root_folders = [
@@ -186,6 +191,7 @@
     "cas_folder",
     "training_events_folder",
     "certificates_folder",
+    "dashboards_folder",
 ]
 
 # MedPerf filenames conventions

@@ -0,0 +1 @@
+"""Dashboard package for preparation monitoring."""
@@ -2,4 +2,4 @@
 
 initialize()
 
-from .preparation_dashboard import t_app # noqa
+from .preparation_dashboard import t_app # noqa
@@ -3,14 +3,17 @@
 import datetime
 import numpy as np
 
+from medperf.entities.benchmark import Benchmark
 from medperf.entities.dataset import Dataset
 from medperf import config
 
-from .utils import get_institution_from_email, get_reports_path, stage_id2name
+from .utils import get_institution_from_email, stage_id2name
 
 
-def get_dsets(mlcube_id):
-    dsets = Dataset.all(filters={"mlcube": mlcube_id})
+def get_dsets(benchmark_id):
+    bmk = Benchmark.get(benchmark_id)
+    data_preparator = bmk.data_preparation_mlcube
+    dsets = Dataset.all(filters={"data_preparation_mlcube": data_preparator})
     dsets = [dset.todict() for dset in dsets]
     for dset in dsets:
         user_id = dset["owner"]
@@ -82,20 +85,23 @@
    registered_sites = dsets_df["institution"].values.tolist()
    sites = list(set(expected_sites + registered_sites))

    with open(sites_path, "w") as f:
         f.write("\n".join(sites))
 
 
-def get_data(mlcube_id, stages_path, institutions_path, out_path):
-    dsets = get_dsets(mlcube_id)
-    full_path = get_reports_path(out_path, mlcube_id)
-    os.makedirs(full_path, exist_ok=True)
+def get_data(benchmark_id, stages_path, institutions_path, out_path):
+    dsets = get_dsets(benchmark_id)
+    if not dsets:
+        return
+
+    os.makedirs(out_path, exist_ok=True)
 
     institutions_df = pd.read_csv(institutions_path)
     user2institution = {u: i for i, u in institutions_df.values.tolist()}
     stages_df = pd.read_csv(stages_path)
     stages_df.set_index("Status Code", inplace=True)
 
     dsets_df = build_dset_df(dsets, user2institution, stages_df)
-    write_dsets_df(dsets_df, full_path)
-    write_sites(dsets_df, institutions_df, full_path)
+    write_dsets_df(dsets_df, out_path)
+    write_sites(dsets_df, institutions_df, out_path)
+    return True
@@ -5,6 +5,8 @@
 import dash_bootstrap_components as dbc
 import pandas as pd
 
+from medperf import config
+
 from .get_data import get_data
 from .utils import get_reports_path
 
@@ -180,7 +182,7 @@
    all_stages_list = stages_df["status_name"].values.tolist()
    all_stages = set(all_stages_list)

    reports = os.listdir(full_path)
    timeseries_reports = list(
        set(reports) - set(["full_table.csv", "latest_table.csv", "sites.txt"])
    )
@@ -231,8 +233,29 @@
     )
 
 
+def no_data_layout():
+    return dbc.Container(
+        [
+            html.H1("Preparation Progress", style={"textAlign": "center"}),
+            dbc.Alert(
+                [
+                    html.H3("No registered datasets", className="alert-heading"),
+                    html.P(
+                        "There are no datasets registered with the data preparator "
+                        "of this benchmark yet.",
+                        className="fs-5",
+                    ),
+                ],
+                color="warning",
+                className="mt-4 text-center",
+            ),
+        ],
+        className="mt-4",
+    )
+
+
 def get_sites_dicts(sites_path, latest_table):
     with open(sites_path, "r") as f:
         sites = f.readlines()
    sites = [site.strip() for site in sites]
    sites += latest_table["institution"].values.tolist()
@@ -243,9 +266,20 @@
     return sites_dicts
 
 
-def build_dash_app(registered_df, stages_colors, latest_table, stages, full_path):
+def _build_dash_app(
+    data_exists,
+    registered_df,
+    stages_colors,
+    latest_table,
+    stages,
+    full_path,
+    prefix,
+):
+
     app = Dash(
         __name__,
+        title="Preparation Dashboard",
+        requests_pathname_prefix=prefix,
         external_stylesheets=[dbc.themes.LUMEN],
         meta_tags=[
             {
@@ -255,6 +289,10 @@
         ],
     )
 
+    if not data_exists:
+        app.layout = no_data_layout()
+        return app
+
     app.layout = dbc.Container(
         [
             html.H1(children="Preparation Progress", style={"textAlign": "center"}),
@@ -267,10 +305,54 @@
     return app
 
 
+def build_app(
+    benchmark_id,
+    stages_path,
+    institutions_path,
+    out_path=None,
+    prefix=None,
+):
+    out_path = out_path or config.dashboards_folder
+    full_path = get_reports_path(out_path, benchmark_id)
+
+    data_exists = get_data(benchmark_id, stages_path, institutions_path, full_path)
+
+    registered_df = None
+    stages_colors = None
+    latest_table = None
+    stages = None
+
+    if data_exists:
+        latest_path = os.path.join(full_path, "latest_table.csv")
+        latest_table = pd.read_csv(latest_path)
+
+        sites_path = os.path.join(full_path, "sites.txt")
+        sites_dicts = get_sites_dicts(sites_path, latest_table)
+
+        registered_df = pd.DataFrame(sites_dicts)
+        registered_df = registered_df.drop_duplicates()
+
+        stages = pd.read_csv(stages_path)
+        stages_colors = (
+            stages[["status_name", "color"]].set_index("status_name").to_dict()["color"]
+        )
+        stages_colors["Unknown"] = "silver"
+
+    return _build_dash_app(
+        data_exists,
+        registered_df,
+        stages_colors,
+        latest_table,
+        stages,
+        full_path,
+        prefix,
+    )
+
+
 @t_app.command()
 def main(
-    mlcube_id: int = Option(
-        ..., "-m", "--mlcube", help="MLCube ID to inspect prparation from"
+    benchmark_id: int = Option(
+        ..., "-b", "--benchmark", help="Benchmark ID to inspect preparation from"
     ),
     stages_path: str = Option(..., "-s", "--stages", help="Path to stages.csv"),
     institutions_path: str = Option(
@@ -283,29 +365,7 @@
         None, "-o", "--out-path", help="location to store progress CSVs"
     ),
 ):
-    cur_path = os.path.dirname(__file__)
-    if out_path is None:
-        out_path = os.path.join(cur_path, "reports")
-
-    get_data(mlcube_id, stages_path, institutions_path, out_path)
-    full_path = get_reports_path(out_path, mlcube_id)
-
-    latest_path = os.path.join(full_path, "latest_table.csv")
-    latest_table = pd.read_csv(latest_path)
-
-    sites_path = os.path.join(full_path, "sites.txt")
-    sites_dicts = get_sites_dicts(sites_path, latest_table)
-
-    registered_df = pd.DataFrame(sites_dicts)
-    registered_df = registered_df.drop_duplicates()
-
-    stages = pd.read_csv(stages_path)
-    stages_colors = (
-        stages[["status_name", "color"]].set_index("status_name").to_dict()["color"]
-    )
-    stages_colors["Unknown"] = "silver"
-
-    app = build_dash_app(registered_df, stages_colors, latest_table, stages, full_path)
+    app = build_app(benchmark_id, stages_path, institutions_path, out_path)
     app.run_server(debug=True)
 
 

@@ -1,8 +1,5 @@
-import re
 import os
 
-from medperf import config
-
 
 def stage_id2name(stage_str, stages_df):
     _, code = stage_str.split()
@@ -23,8 +20,6 @@ def get_institution_from_email(email, user2institution):
         return plausible_institution
 
 
-def get_reports_path(out_path, mlcube_id):
-    server_path = config.server.split("//")[1]
-    server_path = re.sub(r"[.:]", "_", server_path)
-    full_path = os.path.join(out_path, server_path, str(mlcube_id))
+def get_reports_path(out_path, benchmark_id):
+    full_path = os.path.join(out_path, str(benchmark_id))
     return full_path
@@ -53,6 +53,9 @@ def startup_event():
     web_app.state.task_running = False
     web_app.state.MAXLOGMESSAGES = config.webui_max_log_messages
 
+    # {benchmark_id: dict} (checks if mounted and files changed)
+    web_app.state.dashboards = {}
+
     # List of [schemas.Notification] will appear in the notifications tab
     web_app.state.notifications = []
 

@@ -30,6 +30,8 @@
     UpdateAssociationsPolicy,
 )
 
+from medperf.web_ui.utils import mount_dashboard
+
 router = APIRouter()
 logger = logging.getLogger(__name__)
 
@@ -391,3 +393,51 @@ def update_associations_policy(
         url=f"/benchmarks/ui/display/{benchmark_id}",
     )
     return return_response
+
+
+@router.post("/ui/dashboard", response_class=HTMLResponse)
+def preparation_dashboard(
+    request: Request,
+    benchmark_id: int = Form(...),
+    benchmark_name: str = Form(...),
+    stages: str = Form(...),
+    institutions: str = Form(...),
+    force_update: bool = Form(False),
+    current_user: bool = Depends(check_user_ui),
+):
+    errors = False
+    error_message = "Failed to load dashboard: "
+
+    benchmark = Benchmark.get(benchmark_id)
+    is_owner = benchmark.owner == get_medperf_user_data()["id"]
+    if not is_owner:
+        errors = True
+        error_message += "Only the benchmark owner can access the dashboard."
+
+    try:
+        if not errors:
+            mount_dashboard(request, benchmark_id, stages, institutions, force_update)
+    except Exception as exp:
+        logger.exception(exp)
+        errors = True
+        error_message += str(exp)
+
+    if errors:
+        return templates.TemplateResponse(
+            "error.html",
+            {
+                "request": request,
+                "exception": error_message,
+            },
+        )
+
+    return templates.TemplateResponse(
+        "dashboard_wrapper.html",
+        {
+            "request": request,
+            "mount_point": f"/ui/display/{benchmark_id}/dashboard/app",
+            "benchmark_id": benchmark_id,
+            "prev_url": f"/benchmarks/ui/display/{benchmark_id}/",
+            "benchmark_name": benchmark_name,
+        },
+    )
@@ -244,4 +244,51 @@ $(document).ready(() => {
 
     $("#dataset-auto-approve-mode").trigger("change");
     $("#model-auto-approve-mode").trigger("change");
+
+    const btn  = $("#dashboard-btn");
+    const form = $("#dashboard-form-wrapper");
+
+    if(!btn.length || !form.length)
+        return;
+
+    const collapse = new bootstrap.Collapse(form[0], {
+        toggle: false
+    });
+
+    $("#redirect-dashobard-form").off("submit").on("submit", (e) => {
+        e.preventDefault();
+
+        if (!$("#stages-path").val()) {
+            showErrorToast("Make sure to enter a valid path for the stages file");
+            return;
+        }
+
+        if (!$("#institutions-path").val()) {
+            showErrorToast("Make sure to enter a valid path for the institutions file");
+            return;
+        }
+
+        e.currentTarget.submit();
+    });
+
+    $("#browse-stages-btn").on("click", () => {
+        browseWithFiles = true;
+        browseFolderHandler("stages-path");
+    });
+    $("#browse-institutions-btn").on("click", () => {
+        browseWithFiles = true;
+        browseFolderHandler("institutions-path");
+    });
+
+    btn.on("click", function () {
+        const icon = $(this).find("i");
+
+        if (form.hasClass("show")) {
+            collapse.hide();
+            icon.css("transform", "rotate(0deg)");
+        } else {
+            collapse.show();
+            icon.css("transform", "rotate(180deg)");
+        }
+    });
 });
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Dashboard package for preparation monitoring."""
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,4 +2,4 @@

		initialize()

		from .preparation_dashboard import t_app # noqa
		from .preparation_dashboard import t_app # noqa