19
19
from rdagent .log .ui .utils import (
20
20
curve_figure ,
21
21
get_sota_exp_stat ,
22
- load_times ,
22
+ load_times_info ,
23
+ timeline_figure ,
23
24
trace_figure ,
24
25
)
25
26
from rdagent .log .utils import (
@@ -170,9 +171,11 @@ def load_stdout(stdout_path: Path):
170
171
171
172
# UI windows
172
173
def task_win (task ):
173
- with st .container (border = True ):
174
- st .markdown (f"**:violet[{ task .name } ]**" )
174
+ with st .expander (f"**:violet[{ task .name } ]**" , expanded = False ):
175
175
st .markdown (task .description )
176
+ if hasattr (task , "package_info" ):
177
+ st .markdown (f"**:blue[Package Info:]**" )
178
+ st .code (task .package_info )
176
179
if hasattr (task , "architecture" ): # model task
177
180
st .markdown (
178
181
f"""
@@ -185,14 +188,17 @@ def task_win(task):
185
188
186
189
def workspace_win (workspace , cmp_workspace = None , cmp_name = "last code." ):
187
190
show_files = {k : v for k , v in workspace .file_dict .items () if "test" not in k }
188
-
189
191
if len (show_files ) > 0 :
190
192
if cmp_workspace :
191
193
diff = generate_diff_from_dict (cmp_workspace .file_dict , show_files , "main.py" )
192
194
with st .popover (f":violet[**Diff with { cmp_name } **]" , use_container_width = True , icon = "🔍" ):
193
195
st .code ("" .join (diff ), language = "diff" , wrap_lines = True , line_numbers = True )
196
+
197
+ rtime = workspace .running_info .running_time
198
+ time_str = timedelta_to_str (timedelta (seconds = rtime ) if rtime else None ) or "00:00:00"
199
+
194
200
with st .popover (
195
- f"Files in :blue[{ replace_ep_path (workspace .workspace_path )} ]" , use_container_width = True , icon = "📂"
201
+ f"⏱️ { time_str } 📂 Files in :blue[{ replace_ep_path (workspace .workspace_path )} ]" , use_container_width = True
196
202
):
197
203
code_tabs = st .tabs (show_files .keys ())
198
204
for ct , codename in zip (code_tabs , show_files .keys ()):
@@ -276,7 +282,18 @@ def to_str_recursive(obj):
276
282
system = d ["obj" ].get ("system" , None )
277
283
user = d ["obj" ]["user" ]
278
284
resp = d ["obj" ]["resp" ]
279
- with st .expander (f"**LLM**" , icon = "🤖" , expanded = False ):
285
+ start_time = d ["obj" ].get ("start" , "" )
286
+ end_time = d ["obj" ].get ("end" , "" )
287
+ if start_time and end_time :
288
+ start_str = start_time .strftime ("%m-%d %H:%M:%S" )
289
+ end_str = end_time .strftime ("%m-%d %H:%M:%S" )
290
+ duration = end_time - start_time
291
+ time_info_str = (
292
+ f"🕰️:blue[**{ start_str } ~ { end_str } **] ⏳:violet[**{ round (duration .total_seconds (), 2 )} s**]"
293
+ )
294
+ else :
295
+ time_info_str = ""
296
+ with st .expander (f"**LLM** { time_info_str } " , icon = "🤖" , expanded = False ):
280
297
t1 , t2 , t3 , t4 = st .tabs (
281
298
[":green[**Response**]" , ":blue[**User**]" , ":orange[**System**]" , ":violet[**ChatBot**]" ]
282
299
)
@@ -367,13 +384,13 @@ def exp_gen_win(exp_gen_data, llm_data=None):
367
384
st .header ("Exp Gen" , divider = "blue" , anchor = "exp-gen" )
368
385
if state .show_llm_log and llm_data is not None :
369
386
llm_log_win (llm_data ["no_tag" ])
370
- st .subheader ("Hypothesis" )
387
+ st .subheader ("💡 Hypothesis" )
371
388
hypothesis_win (exp_gen_data ["no_tag" ].hypothesis )
372
389
373
- st .subheader ("pending_tasks" )
390
+ st .subheader ("📋 pending_tasks" )
374
391
for tasks in exp_gen_data ["no_tag" ].pending_tasks_list :
375
392
task_win (tasks [0 ])
376
- st .subheader ("Exp Workspace" )
393
+ st .subheader ("📁 Exp Workspace" )
377
394
workspace_win (exp_gen_data ["no_tag" ].experiment_workspace )
378
395
379
396
@@ -573,21 +590,23 @@ def replace_ep_path(p: Path):
573
590
def get_llm_call_stats (llm_data : dict ) -> tuple [int , int ]:
574
591
total_llm_call = 0
575
592
total_filter_call = 0
576
- total_call_seconds = 0
577
- filter_call_seconds = 0
593
+ total_call_duration = timedelta ()
594
+ filter_call_duration = timedelta ()
578
595
filter_sys_prompt = T ("rdagent.utils.prompts:filter_redundant_text.system" ).r ()
579
596
for li , loop_d in llm_data .items ():
580
597
for fn , loop_fn_d in loop_d .items ():
581
598
for k , v in loop_fn_d .items ():
582
599
for d in v :
583
600
if "debug_llm" in d ["tag" ]:
584
601
total_llm_call += 1
585
- total_call_seconds += d ["obj" ].get ("duration " , 0 )
602
+ total_call_duration += d ["obj" ].get ("end " , timedelta ()) - d [ "obj" ]. get ( "start" , timedelta () )
586
603
if "system" in d ["obj" ] and filter_sys_prompt == d ["obj" ]["system" ]:
587
604
total_filter_call += 1
588
- filter_call_seconds += d ["obj" ].get ("duration" , 0 )
605
+ filter_call_duration += d ["obj" ].get ("end" , timedelta ()) - d ["obj" ].get (
606
+ "start" , timedelta ()
607
+ )
589
608
590
- return total_llm_call , total_filter_call , total_call_seconds , filter_call_seconds
609
+ return total_llm_call , total_filter_call , total_call_duration , filter_call_duration
591
610
592
611
593
612
def get_timeout_stats (llm_data : dict ):
@@ -638,13 +657,13 @@ def summarize_win():
638
657
with info3 .popover ("RDLOOP" , icon = "⚙️" ):
639
658
st .write (state .data .get ("settings" , {}).get ("RDLOOP_SETTINGS" , "No settings found." ))
640
659
641
- llm_call , llm_filter_call , llm_call_seconds , llm_filter_call_seconds = get_llm_call_stats (state .llm_data )
642
- info4 .metric ("LLM Calls" , llm_call , help = timedelta_to_str (timedelta ( seconds = llm_call_seconds ) ))
660
+ llm_call , llm_filter_call , llm_call_duration , filter_call_duration = get_llm_call_stats (state .llm_data )
661
+ info4 .metric ("LLM Calls" , llm_call , help = timedelta_to_str (llm_call_duration ))
643
662
info5 .metric (
644
663
"LLM Filter Calls" ,
645
664
llm_filter_call ,
646
665
delta = - round (llm_filter_call / llm_call , 5 ),
647
- help = timedelta_to_str (timedelta ( seconds = llm_filter_call_seconds ) ),
666
+ help = timedelta_to_str (filter_call_duration ),
648
667
)
649
668
650
669
timeout_stats = get_timeout_stats (state .llm_data )
@@ -718,21 +737,28 @@ def summarize_win():
718
737
df .loc [loop , "COST($)" ] = sum (tc .content ["cost" ] for tc in state .token_costs [loop ])
719
738
720
739
# Time Stats
721
- if loop in state .times and state .times [loop ]:
722
- exp_gen_time = coding_time = running_time = None
723
- all_steps_time = timedelta ()
724
- for lpt in state .times [loop ]:
725
- all_steps_time += lpt .end - lpt .start
726
- if lpt .step_idx == 0 :
727
- exp_gen_time = lpt .end - lpt .start
728
- elif lpt .step_idx == 1 :
729
- coding_time = lpt .end - lpt .start
730
- elif lpt .step_idx == 2 :
731
- running_time = lpt .end - lpt .start
732
- df .loc [loop , "Time" ] = timedelta_to_str (all_steps_time )
733
- df .loc [loop , "Exp Gen" ] = timedelta_to_str (exp_gen_time )
734
- df .loc [loop , "Coding" ] = timedelta_to_str (coding_time )
735
- df .loc [loop , "Running" ] = timedelta_to_str (running_time )
740
+ exp_gen_time = timedelta ()
741
+ coding_time = timedelta ()
742
+ running_time = timedelta ()
743
+ all_steps_time = timedelta ()
744
+ if loop in state .times :
745
+ for step_name , step_time in state .times [loop ].items ():
746
+ step_duration = step_time ["end_time" ] - step_time ["start_time" ]
747
+ if step_name == "exp_gen" :
748
+ exp_gen_time += step_duration
749
+ all_steps_time += step_duration
750
+ elif step_name == "coding" :
751
+ coding_time += step_duration
752
+ all_steps_time += step_duration
753
+ elif step_name == "running" :
754
+ running_time += step_duration
755
+ all_steps_time += step_duration
756
+ elif step_name in ["feedback" , "record" ]:
757
+ all_steps_time += step_duration
758
+ df .loc [loop , "Time" ] = timedelta_to_str (all_steps_time )
759
+ df .loc [loop , "Exp Gen" ] = timedelta_to_str (exp_gen_time )
760
+ df .loc [loop , "Coding" ] = timedelta_to_str (coding_time )
761
+ df .loc [loop , "Running" ] = timedelta_to_str (running_time )
736
762
737
763
if "running" in loop_data and "no_tag" in loop_data ["running" ]:
738
764
try :
@@ -835,22 +861,10 @@ def summarize_win():
835
861
df = df [df ["Feedback" ] == "✅" ]
836
862
st .dataframe (df [df .columns [~ df .columns .isin (["Hypothesis" , "Reason" , "Others" ])]])
837
863
838
- # COST curve
839
- costs = df ["COST($)" ].astype (float )
840
- costs .index = [f"L{ i } " for i in costs .index ]
841
- cumulative_costs = costs .cumsum ()
842
- with st .popover ("COST Curve" , icon = "💰" , use_container_width = True ):
843
- fig = px .line (
844
- x = costs .index ,
845
- y = [costs .values , cumulative_costs .values ],
846
- labels = {"x" : "Loop" , "value" : "COST($)" },
847
- title = "COST($) per Loop & Cumulative COST($)" ,
848
- markers = True ,
849
- )
850
- fig .update_traces (mode = "lines+markers" )
851
- fig .data [0 ].name = "COST($) per Loop"
852
- fig .data [1 ].name = "Cumulative COST($)"
853
- st .plotly_chart (fig )
864
+ # timeline figure
865
+ if state .times :
866
+ with st .popover ("Timeline" , icon = "⏱️" , use_container_width = True ):
867
+ st .plotly_chart (timeline_figure (state .times ))
854
868
855
869
# scores curve
856
870
vscores = {}
@@ -920,8 +934,8 @@ def comp_stat_func(x: pd.DataFrame):
920
934
comp_df ["Valid Rate" ] = comp_df ["Valid Rate" ].apply (lambda x : f"{ x } %" )
921
935
comp_df ["Success Rate" ] = comp_df ["Success Rate" ].apply (lambda x : f"{ x } %" )
922
936
comp_df .loc ["Total" , "Avg e-loops(c)" ] = round (df ["e-loops(c)" ].mean (), 2 )
923
- st2 .markdown ( "### Component Statistics")
924
- st2 .dataframe (comp_df )
937
+ with st2 .popover ( " Component Statistics", icon = "📊" , use_container_width = True ):
938
+ st .dataframe (comp_df )
925
939
926
940
# component time statistics
927
941
time_df = df .loc [:, ["Component" , "Time" , "Exp Gen" , "Coding" , "Running" ]]
@@ -933,15 +947,32 @@ def comp_stat_func(x: pd.DataFrame):
933
947
"Running" : "timedelta64[ns]" ,
934
948
}
935
949
)
936
- st1 .markdown ("### Time Statistics" )
937
950
time_stat_df = time_df .groupby ("Component" ).sum ()
938
951
time_stat_df .loc ["Total" ] = time_stat_df .sum ()
939
952
time_stat_df .loc [:, "Exp Gen(%)" ] = (time_stat_df ["Exp Gen" ] / time_stat_df ["Time" ] * 100 ).round (2 )
940
953
time_stat_df .loc [:, "Coding(%)" ] = (time_stat_df ["Coding" ] / time_stat_df ["Time" ] * 100 ).round (2 )
941
954
time_stat_df .loc [:, "Running(%)" ] = (time_stat_df ["Running" ] / time_stat_df ["Time" ] * 100 ).round (2 )
942
955
for col in ["Time" , "Exp Gen" , "Coding" , "Running" ]:
943
956
time_stat_df [col ] = time_stat_df [col ].map (timedelta_to_str )
944
- st1 .dataframe (time_stat_df )
957
+ with st1 .popover ("Time Statistics" , icon = "⏱️" , use_container_width = True ):
958
+ st .dataframe (time_stat_df )
959
+
960
+ # COST curve
961
+ costs = df ["COST($)" ].astype (float )
962
+ costs .index = [f"L{ i } " for i in costs .index ]
963
+ cumulative_costs = costs .cumsum ()
964
+ with st .popover ("COST Curve" , icon = "💰" , use_container_width = True ):
965
+ fig = px .line (
966
+ x = costs .index ,
967
+ y = [costs .values , cumulative_costs .values ],
968
+ labels = {"x" : "Loop" , "value" : "COST($)" },
969
+ title = "COST($) per Loop & Cumulative COST($)" ,
970
+ markers = True ,
971
+ )
972
+ fig .update_traces (mode = "lines+markers" )
973
+ fig .data [0 ].name = "COST($) per Loop"
974
+ fig .data [1 ].name = "Cumulative COST($)"
975
+ st .plotly_chart (fig )
945
976
946
977
947
978
def stdout_win (loop_id : int ):
@@ -1029,7 +1060,7 @@ def get_folders_sorted(log_path, sort_by_time=False):
1029
1060
st .toast ("Please select a log path first!" , icon = "🟡" )
1030
1061
st .stop ()
1031
1062
1032
- state .times = load_times (state .log_folder / state .log_path )
1063
+ state .times = load_times_info (state .log_folder / state .log_path )
1033
1064
state .data , state .llm_data , state .token_costs = load_data (state .log_folder / state .log_path )
1034
1065
state .sota_info = get_sota_exp_stat (Path (state .log_folder ) / state .log_path , to_submit = True )
1035
1066
st .rerun ()
0 commit comments