microsoft
diff --git a/‎rdagent/log/ui/ds_trace.py
Lines changed: 84 additions & 53 deletions b/‎rdagent/log/ui/ds_trace.py
Lines changed: 84 additions & 53 deletions
@@ -19,7 +19,8 @@
 from rdagent.log.ui.utils import (
     curve_figure,
     get_sota_exp_stat,
-    load_times,
+    load_times_info,
+    timeline_figure,
     trace_figure,
 )
 from rdagent.log.utils import (
@@ -170,9 +171,11 @@ def load_stdout(stdout_path: Path):
 
 # UI windows
 def task_win(task):
-    with st.container(border=True):
-        st.markdown(f"**:violet[{task.name}]**")
+    with st.expander(f"**:violet[{task.name}]**", expanded=False):
         st.markdown(task.description)
+        if hasattr(task, "package_info"):
+            st.markdown(f"**:blue[Package Info:]**")
+            st.code(task.package_info)
         if hasattr(task, "architecture"):  # model task
             st.markdown(
                 f"""
@@ -185,14 +188,17 @@ def task_win(task):
 
 def workspace_win(workspace, cmp_workspace=None, cmp_name="last code."):
     show_files = {k: v for k, v in workspace.file_dict.items() if "test" not in k}
-
     if len(show_files) > 0:
         if cmp_workspace:
             diff = generate_diff_from_dict(cmp_workspace.file_dict, show_files, "main.py")
             with st.popover(f":violet[**Diff with {cmp_name}**]", use_container_width=True, icon="🔍"):
                 st.code("".join(diff), language="diff", wrap_lines=True, line_numbers=True)
+
+        rtime = workspace.running_info.running_time
+        time_str = timedelta_to_str(timedelta(seconds=rtime) if rtime else None) or "00:00:00"
+
         with st.popover(
-            f"Files in :blue[{replace_ep_path(workspace.workspace_path)}]", use_container_width=True, icon="📂"
+            f"⏱️{time_str} 📂Files in :blue[{replace_ep_path(workspace.workspace_path)}]", use_container_width=True
         ):
             code_tabs = st.tabs(show_files.keys())
             for ct, codename in zip(code_tabs, show_files.keys()):
@@ -276,7 +282,18 @@ def to_str_recursive(obj):
             system = d["obj"].get("system", None)
             user = d["obj"]["user"]
             resp = d["obj"]["resp"]
-            with st.expander(f"**LLM**", icon="🤖", expanded=False):
+            start_time = d["obj"].get("start", "")
+            end_time = d["obj"].get("end", "")
+            if start_time and end_time:
+                start_str = start_time.strftime("%m-%d %H:%M:%S")
+                end_str = end_time.strftime("%m-%d %H:%M:%S")
+                duration = end_time - start_time
+                time_info_str = (
+                    f"🕰️:blue[**{start_str} ~ {end_str}**] ⏳:violet[**{round(duration.total_seconds(), 2)}s**]"
+                )
+            else:
+                time_info_str = ""
+            with st.expander(f"**LLM** {time_info_str}", icon="🤖", expanded=False):
                 t1, t2, t3, t4 = st.tabs(
                     [":green[**Response**]", ":blue[**User**]", ":orange[**System**]", ":violet[**ChatBot**]"]
                 )
@@ -367,13 +384,13 @@ def exp_gen_win(exp_gen_data, llm_data=None):
     st.header("Exp Gen", divider="blue", anchor="exp-gen")
     if state.show_llm_log and llm_data is not None:
         llm_log_win(llm_data["no_tag"])
-    st.subheader("Hypothesis")
+    st.subheader("💡 Hypothesis")
     hypothesis_win(exp_gen_data["no_tag"].hypothesis)
 
-    st.subheader("pending_tasks")
+    st.subheader("📋 pending_tasks")
     for tasks in exp_gen_data["no_tag"].pending_tasks_list:
         task_win(tasks[0])
-    st.subheader("Exp Workspace")
+    st.subheader("📁 Exp Workspace")
     workspace_win(exp_gen_data["no_tag"].experiment_workspace)
 
 
@@ -573,21 +590,23 @@ def replace_ep_path(p: Path):
 def get_llm_call_stats(llm_data: dict) -> tuple[int, int]:
     total_llm_call = 0
     total_filter_call = 0
-    total_call_seconds = 0
-    filter_call_seconds = 0
+    total_call_duration = timedelta()
+    filter_call_duration = timedelta()
     filter_sys_prompt = T("rdagent.utils.prompts:filter_redundant_text.system").r()
     for li, loop_d in llm_data.items():
         for fn, loop_fn_d in loop_d.items():
             for k, v in loop_fn_d.items():
                 for d in v:
                     if "debug_llm" in d["tag"]:
                         total_llm_call += 1
-                        total_call_seconds += d["obj"].get("duration", 0)
+                        total_call_duration += d["obj"].get("end", timedelta()) - d["obj"].get("start", timedelta())
                         if "system" in d["obj"] and filter_sys_prompt == d["obj"]["system"]:
                             total_filter_call += 1
-                            filter_call_seconds += d["obj"].get("duration", 0)
+                            filter_call_duration += d["obj"].get("end", timedelta()) - d["obj"].get(
+                                "start", timedelta()
+                            )
 
-    return total_llm_call, total_filter_call, total_call_seconds, filter_call_seconds
+    return total_llm_call, total_filter_call, total_call_duration, filter_call_duration
 
 
 def get_timeout_stats(llm_data: dict):
@@ -638,13 +657,13 @@ def summarize_win():
         with info3.popover("RDLOOP", icon="⚙️"):
             st.write(state.data.get("settings", {}).get("RDLOOP_SETTINGS", "No settings found."))
 
-        llm_call, llm_filter_call, llm_call_seconds, llm_filter_call_seconds = get_llm_call_stats(state.llm_data)
-        info4.metric("LLM Calls", llm_call, help=timedelta_to_str(timedelta(seconds=llm_call_seconds)))
+        llm_call, llm_filter_call, llm_call_duration, filter_call_duration = get_llm_call_stats(state.llm_data)
+        info4.metric("LLM Calls", llm_call, help=timedelta_to_str(llm_call_duration))
         info5.metric(
             "LLM Filter Calls",
             llm_filter_call,
             delta=-round(llm_filter_call / llm_call, 5),
-            help=timedelta_to_str(timedelta(seconds=llm_filter_call_seconds)),
+            help=timedelta_to_str(filter_call_duration),
         )
 
         timeout_stats = get_timeout_stats(state.llm_data)
@@ -718,21 +737,28 @@ def summarize_win():
             df.loc[loop, "COST($)"] = sum(tc.content["cost"] for tc in state.token_costs[loop])
 
             # Time Stats
-            if loop in state.times and state.times[loop]:
-                exp_gen_time = coding_time = running_time = None
-                all_steps_time = timedelta()
-                for lpt in state.times[loop]:
-                    all_steps_time += lpt.end - lpt.start
-                    if lpt.step_idx == 0:
-                        exp_gen_time = lpt.end - lpt.start
-                    elif lpt.step_idx == 1:
-                        coding_time = lpt.end - lpt.start
-                    elif lpt.step_idx == 2:
-                        running_time = lpt.end - lpt.start
-                df.loc[loop, "Time"] = timedelta_to_str(all_steps_time)
-                df.loc[loop, "Exp Gen"] = timedelta_to_str(exp_gen_time)
-                df.loc[loop, "Coding"] = timedelta_to_str(coding_time)
-                df.loc[loop, "Running"] = timedelta_to_str(running_time)
+            exp_gen_time = timedelta()
+            coding_time = timedelta()
+            running_time = timedelta()
+            all_steps_time = timedelta()
+            if loop in state.times:
+                for step_name, step_time in state.times[loop].items():
+                    step_duration = step_time["end_time"] - step_time["start_time"]
+                    if step_name == "exp_gen":
+                        exp_gen_time += step_duration
+                        all_steps_time += step_duration
+                    elif step_name == "coding":
+                        coding_time += step_duration
+                        all_steps_time += step_duration
+                    elif step_name == "running":
+                        running_time += step_duration
+                        all_steps_time += step_duration
+                    elif step_name in ["feedback", "record"]:
+                        all_steps_time += step_duration
+            df.loc[loop, "Time"] = timedelta_to_str(all_steps_time)
+            df.loc[loop, "Exp Gen"] = timedelta_to_str(exp_gen_time)
+            df.loc[loop, "Coding"] = timedelta_to_str(coding_time)
+            df.loc[loop, "Running"] = timedelta_to_str(running_time)
 
             if "running" in loop_data and "no_tag" in loop_data["running"]:
                 try:
@@ -835,22 +861,10 @@ def summarize_win():
             df = df[df["Feedback"] == "✅"]
         st.dataframe(df[df.columns[~df.columns.isin(["Hypothesis", "Reason", "Others"])]])
 
-        # COST curve
-        costs = df["COST($)"].astype(float)
-        costs.index = [f"L{i}" for i in costs.index]
-        cumulative_costs = costs.cumsum()
-        with st.popover("COST Curve", icon="💰", use_container_width=True):
-            fig = px.line(
-                x=costs.index,
-                y=[costs.values, cumulative_costs.values],
-                labels={"x": "Loop", "value": "COST($)"},
-                title="COST($) per Loop & Cumulative COST($)",
-                markers=True,
-            )
-            fig.update_traces(mode="lines+markers")
-            fig.data[0].name = "COST($) per Loop"
-            fig.data[1].name = "Cumulative COST($)"
-            st.plotly_chart(fig)
+        # timeline figure
+        if state.times:
+            with st.popover("Timeline", icon="⏱️", use_container_width=True):
+                st.plotly_chart(timeline_figure(state.times))
 
         # scores curve
         vscores = {}
@@ -920,8 +934,8 @@ def comp_stat_func(x: pd.DataFrame):
         comp_df["Valid Rate"] = comp_df["Valid Rate"].apply(lambda x: f"{x}%")
         comp_df["Success Rate"] = comp_df["Success Rate"].apply(lambda x: f"{x}%")
         comp_df.loc["Total", "Avg e-loops(c)"] = round(df["e-loops(c)"].mean(), 2)
-        st2.markdown("### Component Statistics")
-        st2.dataframe(comp_df)
+        with st2.popover("Component Statistics", icon="📊", use_container_width=True):
+            st.dataframe(comp_df)
 
         # component time statistics
         time_df = df.loc[:, ["Component", "Time", "Exp Gen", "Coding", "Running"]]
@@ -933,15 +947,32 @@ def comp_stat_func(x: pd.DataFrame):
                 "Running": "timedelta64[ns]",
             }
         )
-        st1.markdown("### Time Statistics")
         time_stat_df = time_df.groupby("Component").sum()
         time_stat_df.loc["Total"] = time_stat_df.sum()
         time_stat_df.loc[:, "Exp Gen(%)"] = (time_stat_df["Exp Gen"] / time_stat_df["Time"] * 100).round(2)
         time_stat_df.loc[:, "Coding(%)"] = (time_stat_df["Coding"] / time_stat_df["Time"] * 100).round(2)
         time_stat_df.loc[:, "Running(%)"] = (time_stat_df["Running"] / time_stat_df["Time"] * 100).round(2)
         for col in ["Time", "Exp Gen", "Coding", "Running"]:
             time_stat_df[col] = time_stat_df[col].map(timedelta_to_str)
-        st1.dataframe(time_stat_df)
+        with st1.popover("Time Statistics", icon="⏱️", use_container_width=True):
+            st.dataframe(time_stat_df)
+
+        # COST curve
+        costs = df["COST($)"].astype(float)
+        costs.index = [f"L{i}" for i in costs.index]
+        cumulative_costs = costs.cumsum()
+        with st.popover("COST Curve", icon="💰", use_container_width=True):
+            fig = px.line(
+                x=costs.index,
+                y=[costs.values, cumulative_costs.values],
+                labels={"x": "Loop", "value": "COST($)"},
+                title="COST($) per Loop & Cumulative COST($)",
+                markers=True,
+            )
+            fig.update_traces(mode="lines+markers")
+            fig.data[0].name = "COST($) per Loop"
+            fig.data[1].name = "Cumulative COST($)"
+            st.plotly_chart(fig)
 
 
 def stdout_win(loop_id: int):
@@ -1029,7 +1060,7 @@ def get_folders_sorted(log_path, sort_by_time=False):
                 st.toast("Please select a log path first!", icon="🟡")
                 st.stop()
 
-            state.times = load_times(state.log_folder / state.log_path)
+            state.times = load_times_info(state.log_folder / state.log_path)
             state.data, state.llm_data, state.token_costs = load_data(state.log_folder / state.log_path)
             state.sota_info = get_sota_exp_stat(Path(state.log_folder) / state.log_path, to_submit=True)
             st.rerun()