@@ -456,6 +456,18 @@ async def _get_custom_attack_objectives(
456456 # Extract content from selected objectives
457457 selected_prompts = []
458458 for obj in selected_cat_objectives :
459+ risk_subtype = None
460+ # Extract risk-subtype from target_harms if present
461+ target_harms = obj .get ("metadata" , {}).get ("target_harms" , [])
462+ if target_harms and isinstance (target_harms , list ):
463+ for harm in target_harms :
464+ if isinstance (harm , dict ) and "risk-subtype" in harm :
465+ subtype_value = harm .get ("risk-subtype" )
466+ # Only store non-empty risk-subtype values
467+ if subtype_value and subtype_value .strip ():
468+ risk_subtype = subtype_value
469+ break # Use the first non-empty risk-subtype found
470+
459471 if "messages" in obj and len (obj ["messages" ]) > 0 :
460472 message = obj ["messages" ][0 ]
461473 if isinstance (message , dict ) and "content" in message :
@@ -464,6 +476,9 @@ async def _get_custom_attack_objectives(
464476 selected_prompts .append (content )
465477 # Store mapping of content to context for later evaluation
466478 self .prompt_to_context [content ] = context
479+ # Store risk_subtype mapping if it exists
480+ if risk_subtype :
481+ self .prompt_to_risk_subtype [content ] = risk_subtype
467482
468483 # Store in cache and return
469484 self ._cache_attack_objectives (current_key , risk_cat_value , strategy , selected_prompts , selected_cat_objectives )
0 commit comments