@@ -670,7 +670,7 @@ async def _prompt_sending_orchestrator(
670670
671671 except asyncio .TimeoutError :
672672 self .logger .warning (f"Batch { batch_idx + 1 } for { strategy_name } /{ risk_category } timed out after { timeout } seconds, continuing with partial results" )
673- self .logger .debug (f"❌ Timeout: Strategy { strategy_name } , Risk { risk_category } , Batch { batch_idx + 1 } after { timeout } seconds." , exc_info = True )
673+ self .logger .debug (f"Timeout: Strategy { strategy_name } , Risk { risk_category } , Batch { batch_idx + 1 } after { timeout } seconds." , exc_info = True )
674674 print (f"⚠️ TIMEOUT: Strategy { strategy_name } , Risk { risk_category } , Batch { batch_idx + 1 } " )
675675 # Set task status to TIMEOUT
676676 batch_task_key = f"{ strategy_name } _{ risk_category } _batch_{ batch_idx + 1 } "
@@ -923,7 +923,11 @@ def _to_red_team_result(self) -> _RedTeamResult:
923923 risk_category_summary = {}
924924
925925 # Overall metrics across all categories
926- overall_asr = round (list_mean_nan_safe (results_df ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in results_df .columns else 0.0
926+ try :
927+ overall_asr = round (list_mean_nan_safe (results_df ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in results_df .columns else 0.0
928+ except EvaluationException :
929+ self .logger .debug ("All values in overall attack success array were None or NaN, setting ASR to NaN" )
930+ overall_asr = math .nan
927931 overall_total = len (results_df )
928932 overall_successful_attacks = sum ([s for s in results_df ["attack_success" ].tolist () if not is_none_or_nan (s )]) if "attack_success" in results_df .columns else 0
929933
@@ -935,7 +939,11 @@ def _to_red_team_result(self) -> _RedTeamResult:
935939
936940 # Per-risk category metrics
937941 for risk , group in risk_category_groups :
938- asr = round (list_mean_nan_safe (group ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in group .columns else 0.0
942+ try :
943+ asr = round (list_mean_nan_safe (group ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in group .columns else 0.0
944+ except EvaluationException :
945+ self .logger .debug (f"All values in attack success array for { risk } were None or NaN, setting ASR to NaN" )
946+ asr = math .nan
939947 total = len (group )
940948 successful_attacks = sum ([s for s in group ["attack_success" ].tolist () if not is_none_or_nan (s )]) if "attack_success" in group .columns else 0
941949
@@ -958,35 +966,55 @@ def _to_red_team_result(self) -> _RedTeamResult:
958966 # Baseline metrics
959967 baseline_df = results_df [baseline_mask ]
960968 if not baseline_df .empty :
969+ try :
970+ baseline_asr = round (list_mean_nan_safe (baseline_df ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in baseline_df .columns else 0.0
971+ except EvaluationException :
972+ self .logger .debug ("All values in baseline attack success array were None or NaN, setting ASR to NaN" )
973+ baseline_asr = math .nan
961974 attack_technique_summary_dict .update ({
962- "baseline_asr" : round ( list_mean_nan_safe ( baseline_df [ "attack_success" ]. tolist ()) * 100 , 2 ) if "attack_success" in baseline_df . columns else 0.0 ,
975+ "baseline_asr" : baseline_asr ,
963976 "baseline_total" : len (baseline_df ),
964977 "baseline_attack_successes" : sum ([s for s in baseline_df ["attack_success" ].tolist () if not is_none_or_nan (s )]) if "attack_success" in baseline_df .columns else 0
965978 })
966979
967980 # Easy complexity metrics
968981 easy_df = results_df [easy_mask ]
969982 if not easy_df .empty :
983+ try :
984+ easy_complexity_asr = round (list_mean_nan_safe (easy_df ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in easy_df .columns else 0.0
985+ except EvaluationException :
986+ self .logger .debug ("All values in easy complexity attack success array were None or NaN, setting ASR to NaN" )
987+ easy_complexity_asr = math .nan
970988 attack_technique_summary_dict .update ({
971- "easy_complexity_asr" : round ( list_mean_nan_safe ( easy_df [ "attack_success" ]. tolist ()) * 100 , 2 ) if "attack_success" in easy_df . columns else 0.0 ,
989+ "easy_complexity_asr" : easy_complexity_asr ,
972990 "easy_complexity_total" : len (easy_df ),
973991 "easy_complexity_attack_successes" : sum ([s for s in easy_df ["attack_success" ].tolist () if not is_none_or_nan (s )]) if "attack_success" in easy_df .columns else 0
974992 })
975993
976994 # Moderate complexity metrics
977995 moderate_df = results_df [moderate_mask ]
978996 if not moderate_df .empty :
997+ try :
998+ moderate_complexity_asr = round (list_mean_nan_safe (moderate_df ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in moderate_df .columns else 0.0
999+ except EvaluationException :
1000+ self .logger .debug ("All values in moderate complexity attack success array were None or NaN, setting ASR to NaN" )
1001+ moderate_complexity_asr = math .nan
9791002 attack_technique_summary_dict .update ({
980- "moderate_complexity_asr" : round ( list_mean_nan_safe ( moderate_df [ "attack_success" ]. tolist ()) * 100 , 2 ) if "attack_success" in moderate_df . columns else 0.0 ,
1003+ "moderate_complexity_asr" : moderate_complexity_asr ,
9811004 "moderate_complexity_total" : len (moderate_df ),
9821005 "moderate_complexity_attack_successes" : sum ([s for s in moderate_df ["attack_success" ].tolist () if not is_none_or_nan (s )]) if "attack_success" in moderate_df .columns else 0
9831006 })
9841007
9851008 # Difficult complexity metrics
9861009 difficult_df = results_df [difficult_mask ]
9871010 if not difficult_df .empty :
1011+ try :
1012+ difficult_complexity_asr = round (list_mean_nan_safe (difficult_df ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in difficult_df .columns else 0.0
1013+ except EvaluationException :
1014+ self .logger .debug ("All values in difficult complexity attack success array were None or NaN, setting ASR to NaN" )
1015+ difficult_complexity_asr = math .nan
9881016 attack_technique_summary_dict .update ({
989- "difficult_complexity_asr" : round ( list_mean_nan_safe ( difficult_df [ "attack_success" ]. tolist ()) * 100 , 2 ) if "attack_success" in difficult_df . columns else 0.0 ,
1017+ "difficult_complexity_asr" : difficult_complexity_asr ,
9901018 "difficult_complexity_total" : len (difficult_df ),
9911019 "difficult_complexity_attack_successes" : sum ([s for s in difficult_df ["attack_success" ].tolist () if not is_none_or_nan (s )]) if "attack_success" in difficult_df .columns else 0
9921020 })
@@ -1013,22 +1041,38 @@ def _to_red_team_result(self) -> _RedTeamResult:
10131041 # Baseline ASR for this risk
10141042 baseline_risk_df = results_df [risk_mask & baseline_mask ]
10151043 if not baseline_risk_df .empty :
1016- joint_risk_dict ["baseline_asr" ] = round (list_mean_nan_safe (baseline_risk_df ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in baseline_risk_df .columns else 0.0
1044+ try :
1045+ joint_risk_dict ["baseline_asr" ] = round (list_mean_nan_safe (baseline_risk_df ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in baseline_risk_df .columns else 0.0
1046+ except EvaluationException :
1047+ self .logger .debug (f"All values in baseline attack success array for { risk_key } were None or NaN, setting ASR to NaN" )
1048+ joint_risk_dict ["baseline_asr" ] = math .nan
10171049
10181050 # Easy complexity ASR for this risk
10191051 easy_risk_df = results_df [risk_mask & easy_mask ]
10201052 if not easy_risk_df .empty :
1021- joint_risk_dict ["easy_complexity_asr" ] = round (list_mean_nan_safe (easy_risk_df ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in easy_risk_df .columns else 0.0
1053+ try :
1054+ joint_risk_dict ["easy_complexity_asr" ] = round (list_mean_nan_safe (easy_risk_df ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in easy_risk_df .columns else 0.0
1055+ except EvaluationException :
1056+ self .logger .debug (f"All values in easy complexity attack success array for { risk_key } were None or NaN, setting ASR to NaN" )
1057+ joint_risk_dict ["easy_complexity_asr" ] = math .nan
10221058
10231059 # Moderate complexity ASR for this risk
10241060 moderate_risk_df = results_df [risk_mask & moderate_mask ]
10251061 if not moderate_risk_df .empty :
1026- joint_risk_dict ["moderate_complexity_asr" ] = round (list_mean_nan_safe (moderate_risk_df ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in moderate_risk_df .columns else 0.0
1062+ try :
1063+ joint_risk_dict ["moderate_complexity_asr" ] = round (list_mean_nan_safe (moderate_risk_df ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in moderate_risk_df .columns else 0.0
1064+ except EvaluationException :
1065+ self .logger .debug (f"All values in moderate complexity attack success array for { risk_key } were None or NaN, setting ASR to NaN" )
1066+ joint_risk_dict ["moderate_complexity_asr" ] = math .nan
10271067
10281068 # Difficult complexity ASR for this risk
10291069 difficult_risk_df = results_df [risk_mask & difficult_mask ]
10301070 if not difficult_risk_df .empty :
1031- joint_risk_dict ["difficult_complexity_asr" ] = round (list_mean_nan_safe (difficult_risk_df ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in difficult_risk_df .columns else 0.0
1071+ try :
1072+ joint_risk_dict ["difficult_complexity_asr" ] = round (list_mean_nan_safe (difficult_risk_df ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in difficult_risk_df .columns else 0.0
1073+ except EvaluationException :
1074+ self .logger .debug (f"All values in difficult complexity attack success array for { risk_key } were None or NaN, setting ASR to NaN" )
1075+ joint_risk_dict ["difficult_complexity_asr" ] = math .nan
10321076
10331077 joint_risk_attack_summary .append (joint_risk_dict )
10341078
@@ -1055,7 +1099,11 @@ def _to_red_team_result(self) -> _RedTeamResult:
10551099
10561100 converter_groups = complexity_risk_df .groupby ("converter" )
10571101 for converter_name , converter_group in converter_groups :
1058- asr_value = round (list_mean_nan_safe (converter_group ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in converter_group .columns else 0.0
1102+ try :
1103+ asr_value = round (list_mean_nan_safe (converter_group ["attack_success" ].tolist ()) * 100 , 2 ) if "attack_success" in converter_group .columns else 0.0
1104+ except EvaluationException :
1105+ self .logger .debug (f"All values in attack success array for { converter_name } in { complexity } /{ risk_key } were None or NaN, setting ASR to NaN" )
1106+ asr_value = math .nan
10591107 detailed_joint_risk_attack_asr [complexity ][risk_key ][f"{ converter_name } _ASR" ] = asr_value
10601108
10611109 # Compile the scorecard
0 commit comments