@@ -189,9 +189,11 @@ def workspace_win(workspace, cmp_workspace=None, cmp_name="last code."):
189
189
if len (show_files ) > 0 :
190
190
if cmp_workspace :
191
191
diff = generate_diff_from_dict (cmp_workspace .file_dict , show_files , "main.py" )
192
- with st .expander (f":violet[**Diff with { cmp_name } **]" ):
192
+ with st .popover (f":violet[**Diff with { cmp_name } **]" , use_container_width = True , icon = "🔍 " ):
193
193
st .code ("" .join (diff ), language = "diff" , wrap_lines = True , line_numbers = True )
194
- with st .expander (f"Files in :blue[{ replace_ep_path (workspace .workspace_path )} ]" ):
194
+ with st .popover (
195
+ f"Files in :blue[{ replace_ep_path (workspace .workspace_path )} ]" , use_container_width = True , icon = "📂"
196
+ ):
195
197
code_tabs = st .tabs (show_files .keys ())
196
198
for ct , codename in zip (code_tabs , show_files .keys ()):
197
199
with ct :
@@ -562,16 +564,21 @@ def replace_ep_path(p: Path):
562
564
def get_llm_call_stats (llm_data : dict ) -> tuple [int , int ]:
563
565
total_llm_call = 0
564
566
total_filter_call = 0
567
+ total_call_seconds = 0
568
+ filter_call_seconds = 0
565
569
filter_sys_prompt = T ("rdagent.utils.prompts:filter_redundant_text.system" ).r ()
566
570
for li , loop_d in llm_data .items ():
567
571
for fn , loop_fn_d in loop_d .items ():
568
572
for k , v in loop_fn_d .items ():
569
573
for d in v :
570
574
if "debug_llm" in d ["tag" ]:
571
575
total_llm_call += 1
576
+ total_call_seconds += d ["obj" ].get ("duration" , 0 )
572
577
if "system" in d ["obj" ] and filter_sys_prompt == d ["obj" ]["system" ]:
573
578
total_filter_call += 1
574
- return total_llm_call , total_filter_call
579
+ filter_call_seconds += d ["obj" ].get ("duration" , 0 )
580
+
581
+ return total_llm_call , total_filter_call , total_call_seconds , filter_call_seconds
575
582
576
583
577
584
def get_timeout_stats (llm_data : dict ):
@@ -622,18 +629,23 @@ def summarize_win():
622
629
with info3 .popover ("RDLOOP" , icon = "⚙️" ):
623
630
st .write (state .data .get ("settings" , {}).get ("RDLOOP_SETTINGS" , "No settings found." ))
624
631
625
- llm_call , llm_filter_call = get_llm_call_stats (state .llm_data )
626
- info4 .metric ("LLM Calls" , llm_call )
627
- info5 .metric ("LLM Filter Calls" , f"{ llm_filter_call } ({ round (llm_filter_call / llm_call * 100 , 2 )} %)" )
632
+ llm_call , llm_filter_call , llm_call_seconds , llm_filter_call_seconds = get_llm_call_stats (state .llm_data )
633
+ info4 .metric ("LLM Calls" , llm_call , help = timedelta_to_str (timedelta (seconds = llm_call_seconds )))
634
+ info5 .metric (
635
+ "LLM Filter Calls" ,
636
+ llm_filter_call ,
637
+ delta = - round (llm_filter_call / llm_call , 5 ),
638
+ help = timedelta_to_str (timedelta (seconds = llm_filter_call_seconds )),
639
+ )
628
640
629
641
timeout_stats = get_timeout_stats (state .llm_data )
630
642
info6 .metric (
631
- "Timeouts (Coding )" ,
643
+ "Timeouts (C )" ,
632
644
f"{ round (timeout_stats ['coding' ]['timeout' ] / timeout_stats ['coding' ]['total' ] * 100 , 2 )} %" ,
633
645
help = f"{ timeout_stats ['coding' ]['timeout' ]} /{ timeout_stats ['coding' ]['total' ]} " ,
634
646
)
635
647
info7 .metric (
636
- "Timeouts (Running )" ,
648
+ "Timeouts (R )" ,
637
649
f"{ round (timeout_stats ['running' ]['timeout' ] / timeout_stats ['running' ]['total' ] * 100 , 2 )} %" ,
638
650
help = f"{ timeout_stats ['running' ]['timeout' ]} /{ timeout_stats ['running' ]['total' ]} " ,
639
651
)
@@ -661,8 +673,8 @@ def summarize_win():
661
673
"Running Score (valid)" ,
662
674
"Running Score (test)" ,
663
675
"Feedback" ,
664
- "e-loops(coding )" ,
665
- "e-loops(running )" ,
676
+ "e-loops(c )" ,
677
+ "e-loops(r )" ,
666
678
"COST($)" ,
667
679
"Time" ,
668
680
"Exp Gen" ,
@@ -784,18 +796,14 @@ def summarize_win():
784
796
785
797
if "coding" in loop_data :
786
798
if len ([i for i in loop_data ["coding" ].keys () if isinstance (i , int )]) == 0 :
787
- df .loc [loop , "e-loops(coding )" ] = 0
799
+ df .loc [loop , "e-loops(c )" ] = 0
788
800
else :
789
- df .loc [loop , "e-loops(coding)" ] = (
790
- max (i for i in loop_data ["coding" ].keys () if isinstance (i , int )) + 1
791
- )
801
+ df .loc [loop , "e-loops(c)" ] = max (i for i in loop_data ["coding" ].keys () if isinstance (i , int )) + 1
792
802
if "running" in loop_data :
793
803
if len ([i for i in loop_data ["running" ].keys () if isinstance (i , int )]) == 0 :
794
- df .loc [loop , "e-loops(running )" ] = 0
804
+ df .loc [loop , "e-loops(r )" ] = 0
795
805
else :
796
- df .loc [loop , "e-loops(running)" ] = (
797
- max (i for i in loop_data ["running" ].keys () if isinstance (i , int )) + 1
798
- )
806
+ df .loc [loop , "e-loops(r)" ] = max (i for i in loop_data ["running" ].keys () if isinstance (i , int )) + 1
799
807
if "feedback" in loop_data :
800
808
fb_emoji_str = "✅" if bool (loop_data ["feedback" ]["no_tag" ]) else "❌"
801
809
if sota_loop_id == loop :
@@ -863,23 +871,23 @@ def comp_stat_func(x: pd.DataFrame):
863
871
total_num = x .shape [0 ]
864
872
valid_num = x [x ["Running Score (test)" ] != "N/A" ].shape [0 ]
865
873
success_num = x [x ["Feedback" ] == "✅" ].shape [0 ]
866
- avg_e_loops = x ["e-loops(coding )" ].mean ()
874
+ avg_e_loops = x ["e-loops(c )" ].mean ()
867
875
return pd .Series (
868
876
{
869
877
"Loop Num" : total_num ,
870
878
"Valid Loop" : valid_num ,
871
879
"Success Loop" : success_num ,
872
880
"Valid Rate" : round (valid_num / total_num * 100 , 2 ),
873
881
"Success Rate" : round (success_num / total_num * 100 , 2 ),
874
- "Avg e-loops(coding )" : round (avg_e_loops , 2 ),
882
+ "Avg e-loops(c )" : round (avg_e_loops , 2 ),
875
883
}
876
884
)
877
885
878
886
st1 , st2 = st .columns ([1 , 1 ])
879
887
880
888
# component statistics
881
889
comp_df = (
882
- df .loc [:, ["Component" , "Running Score (test)" , "Feedback" , "e-loops(coding )" ]]
890
+ df .loc [:, ["Component" , "Running Score (test)" , "Feedback" , "e-loops(c )" ]]
883
891
.groupby ("Component" )
884
892
.apply (comp_stat_func , include_groups = False )
885
893
)
@@ -892,7 +900,7 @@ def comp_stat_func(x: pd.DataFrame):
892
900
)
893
901
comp_df ["Valid Rate" ] = comp_df ["Valid Rate" ].apply (lambda x : f"{ x } %" )
894
902
comp_df ["Success Rate" ] = comp_df ["Success Rate" ].apply (lambda x : f"{ x } %" )
895
- comp_df .loc ["Total" , "Avg e-loops(coding )" ] = round (df ["e-loops(coding )" ].mean (), 2 )
903
+ comp_df .loc ["Total" , "Avg e-loops(c )" ] = round (df ["e-loops(c )" ].mean (), 2 )
896
904
st2 .markdown ("### Component Statistics" )
897
905
st2 .dataframe (comp_df )
898
906
0 commit comments