Skip to content

Commit 3308694

Browse files
nagkumar91Nagkumar ArkalgudNagkumar ArkalgudNagkumar Arkalgud
authored
Output structure for red team (Azure#40382)
* Update task_query_response.prompty remove required keys * Update task_simulate.prompty * Update task_query_response.prompty * Update task_simulate.prompty * Fix the api_key needed * Update for release * Black fix for file * Add original text in global context * Update test * Update the indirect attack simulator * Black suggested fixes * Update simulator prompty * Update adversarial scenario enum to exclude XPIA * Update changelog * Black fixes * Remove duplicate import * Fix the mypy error * Mypy please be happy * Updates to non adv simulator * accept context from assistant messages, exclude them when using them for conversation * update changelog * pylint fixes * pylint fixes * remove redundant quotes * Fix typo * pylint fix * Update broken tests * Include the grounding json in the manifest * Fix typo * Come on package * Release 1.0.0b5 * Notice from Chang * Remove adv_conv template parameters from the outputs * Update chanagelog * Experimental tags on adv scenarios * Readme fix onbreaking change * Add the category and both user and assistant context to the response of qr_json_lines * Update changelog * Rename _kwargs to _options * _options as prefix * update troubleshooting for simulator * Rename according to suggestions * Clean up readme * more links * Bugfix: zip_longest created null parameters * Updated changelog * zip does the job * remove ununsed import * Fix changelog merge * Remove print statements * red team output is now result * fix result structure * Changes for ankit * update test and init for red team --------- Co-authored-by: Nagkumar Arkalgud <[email protected]> Co-authored-by: Nagkumar Arkalgud <[email protected]> Co-authored-by: Nagkumar Arkalgud <[email protected]>
1 parent 8b5fbd8 commit 3308694

File tree

7 files changed

+258
-115
lines changed

7 files changed

+258
-115
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_converters/_ai_services.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,7 @@ def prepare_evaluation_data(self, thread_ids=Union[str, List[str]], filename: st
667667
return evaluations
668668

669669
@staticmethod
670-
def run_ids_from_conversation(conversation: dict) -> List[str]:
670+
def _run_ids_from_conversation(conversation: dict) -> List[str]:
671671
"""
672672
Extracts a list of unique run IDs from a conversation dictionary.
673673
@@ -684,7 +684,7 @@ def run_ids_from_conversation(conversation: dict) -> List[str]:
684684
return run_ids
685685

686686
@staticmethod
687-
def convert_from_conversation(
687+
def _convert_from_conversation(
688688
conversation: dict, run_id: str, exclude_tool_calls_previous_runs: bool = False
689689
) -> dict:
690690
"""
@@ -765,7 +765,7 @@ def convert_from_conversation(
765765
return json.loads(final_result.to_json())
766766

767767
@staticmethod
768-
def convert_from_file(filename: str, run_id: str) -> dict:
768+
def _convert_from_file(filename: str, run_id: str) -> dict:
769769
"""
770770
Converts the agent run from a JSON file to a format suitable for the OpenAI API, the JSON file being a thread.
771771
@@ -801,4 +801,4 @@ def convert_from_file(filename: str, run_id: str) -> dict:
801801
with open(filename, mode="r", encoding="utf-8") as file:
802802
data = json.load(file)
803803

804-
return AIAgentConverter.convert_from_conversation(data, run_id)
804+
return AIAgentConverter._convert_from_conversation(data, run_id)

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/__init__.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,18 @@
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# ---------------------------------------------------------
44

5-
from ._red_team import RedTeam
6-
from ._attack_strategy import AttackStrategy
7-
from ._attack_objective_generator import RiskCategory
8-
from ._red_team_result import RedTeamOutput
5+
try:
6+
from ._red_team import RedTeam
7+
from ._attack_strategy import AttackStrategy
8+
from ._attack_objective_generator import RiskCategory
9+
from ._red_team_result import RedTeamResult
10+
except ImportError:
11+
print("[INFO] Could not import Pyrit. Please install the dependency with `pip install azure-ai-evaluation[redteam]`.")
12+
913

1014
__all__ = [
1115
"RedTeam",
1216
"AttackStrategy",
1317
"RiskCategory",
14-
"RedTeamOutput",
15-
]
18+
"RedTeamResult",
19+
]

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_red_team.py

Lines changed: 46 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
from azure.core.credentials import TokenCredential
4444

4545
# Red Teaming imports
46-
from ._red_team_result import _RedTeamResult, _RedTeamingScorecard, _RedTeamingParameters, RedTeamOutput
46+
from ._red_team_result import RedTeamResult, RedTeamingScorecard, RedTeamingParameters, ScanResult
4747
from ._attack_strategy import AttackStrategy
4848
from ._attack_objective_generator import RiskCategory, _AttackObjectiveGenerator
4949

@@ -204,7 +204,7 @@ def _start_redteam_mlflow_run(
204204

205205
async def _log_redteam_results_to_mlflow(
206206
self,
207-
redteam_output: RedTeamOutput,
207+
redteam_output: RedTeamResult,
208208
eval_run: EvalRun,
209209
data_only: bool = False,
210210
) -> Optional[str]:
@@ -230,9 +230,9 @@ async def _log_redteam_results_to_mlflow(
230230
with open(artifact_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
231231
if data_only:
232232
# In data_only mode, we write the conversations in conversation/messages format
233-
f.write(json.dumps({"conversations": redteam_output.redteaming_data or []}))
234-
elif redteam_output.red_team_result:
235-
json.dump(redteam_output.red_team_result, f)
233+
f.write(json.dumps({"conversations": redteam_output.attack_details or []}))
234+
elif redteam_output.scan_result:
235+
json.dump(redteam_output.scan_result, f)
236236

237237
eval_info_name = "redteam_info.json"
238238
eval_info_path = os.path.join(self.scan_output_dir, eval_info_name)
@@ -248,10 +248,10 @@ async def _log_redteam_results_to_mlflow(
248248
f.write(json.dumps(red_team_info_logged))
249249

250250
# Also save a human-readable scorecard if available
251-
if not data_only and redteam_output.red_team_result:
251+
if not data_only and redteam_output.scan_result:
252252
scorecard_path = os.path.join(self.scan_output_dir, "scorecard.txt")
253253
with open(scorecard_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
254-
f.write(self._to_scorecard(redteam_output.red_team_result))
254+
f.write(self._to_scorecard(redteam_output.scan_result))
255255
self.logger.debug(f"Saved scorecard to: {scorecard_path}")
256256

257257
# Create a dedicated artifacts directory with proper structure for MLFlow
@@ -262,9 +262,9 @@ async def _log_redteam_results_to_mlflow(
262262
# First, create the main artifact file that MLFlow expects
263263
with open(os.path.join(tmpdir, artifact_name), "w", encoding=DefaultOpenEncoding.WRITE) as f:
264264
if data_only:
265-
f.write(json.dumps({"conversations": redteam_output.redteaming_data or []}))
266-
elif redteam_output.red_team_result:
267-
json.dump(redteam_output.red_team_result, f)
265+
f.write(json.dumps({"conversations": redteam_output.attack_details or []}))
266+
elif redteam_output.scan_result:
267+
json.dump(redteam_output.scan_result, f)
268268

269269
# Copy all relevant files to the temp directory
270270
import shutil
@@ -303,9 +303,9 @@ async def _log_redteam_results_to_mlflow(
303303
artifact_file = Path(tmpdir) / artifact_name
304304
with open(artifact_file, "w", encoding=DefaultOpenEncoding.WRITE) as f:
305305
if data_only:
306-
f.write(json.dumps({"conversations": redteam_output.redteaming_data or []}))
307-
elif redteam_output.red_team_result:
308-
json.dump(redteam_output.red_team_result, f)
306+
f.write(json.dumps({"conversations": redteam_output.attack_details or []}))
307+
elif redteam_output.scan_result:
308+
json.dump(redteam_output.scan_result, f)
309309
eval_run.log_artifact(tmpdir, artifact_name)
310310
self.logger.debug(f"Logged artifact: {artifact_name}")
311311

@@ -316,8 +316,8 @@ async def _log_redteam_results_to_mlflow(
316316
"_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
317317
})
318318

319-
if redteam_output.red_team_result:
320-
scorecard = redteam_output.red_team_result["redteaming_scorecard"]
319+
if redteam_output.scan_result:
320+
scorecard = redteam_output.scan_result["scorecard"]
321321
joint_attack_summary = scorecard["joint_risk_attack_summary"]
322322

323323
if joint_attack_summary:
@@ -793,13 +793,13 @@ def _get_attack_success(self, result: str) -> bool:
793793
from ._utils.formatting_utils import get_attack_success
794794
return get_attack_success(result)
795795

796-
def _to_red_team_result(self) -> _RedTeamResult:
797-
"""Convert tracking data from red_team_info to the _RedTeamResult format.
796+
def _to_red_team_result(self) -> RedTeamResult:
797+
"""Convert tracking data from red_team_info to the RedTeamResult format.
798798
799-
Uses only the red_team_info tracking dictionary to build the _RedTeamResult.
799+
Uses only the red_team_info tracking dictionary to build the RedTeamResult.
800800
801801
:return: Structured red team agent results
802-
:rtype: _RedTeamResult
802+
:rtype: RedTeamResult
803803
"""
804804
converters = []
805805
complexity_levels = []
@@ -812,7 +812,7 @@ def _to_red_team_result(self) -> _RedTeamResult:
812812
summary_file = os.path.join(self.scan_output_dir, "attack_summary.csv")
813813
self.logger.debug(f"Creating attack summary CSV file: {summary_file}")
814814

815-
self.logger.info(f"Building _RedTeamResult from red_team_info with {len(self.red_team_info)} strategies")
815+
self.logger.info(f"Building RedTeamResult from red_team_info with {len(self.red_team_info)} strategies")
816816

817817
# Process each strategy and risk category from red_team_info
818818
for strategy_name, risk_data in self.red_team_info.items():
@@ -1155,20 +1155,20 @@ def _to_red_team_result(self) -> _RedTeamResult:
11551155
complexity_converters = complexity_df["converter"].unique().tolist()
11561156
redteaming_parameters["techniques_used"][complexity] = complexity_converters
11571157

1158-
self.logger.info("_RedTeamResult creation completed")
1158+
self.logger.info("RedTeamResult creation completed")
11591159

11601160
# Create the final result
1161-
red_team_result = _RedTeamResult(
1162-
redteaming_scorecard=cast(_RedTeamingScorecard, scorecard),
1163-
redteaming_parameters=cast(_RedTeamingParameters, redteaming_parameters),
1164-
redteaming_data=conversations,
1161+
red_team_result = ScanResult(
1162+
scorecard=cast(RedTeamingScorecard, scorecard),
1163+
parameters=cast(RedTeamingParameters, redteaming_parameters),
1164+
attack_details=conversations,
11651165
studio_url=self.ai_studio_url or None
11661166
)
11671167

11681168
return red_team_result
11691169

11701170
# Replace with utility function
1171-
def _to_scorecard(self, redteam_result: _RedTeamResult) -> str:
1171+
def _to_scorecard(self, redteam_result: RedTeamResult) -> str:
11721172
from ._utils.formatting_utils import format_scorecard
11731173
return format_scorecard(redteam_result)
11741174

@@ -1445,7 +1445,7 @@ async def scan(
14451445
parallel_execution: bool = True,
14461446
max_parallel_tasks: int = 5,
14471447
timeout: int = 120
1448-
) -> RedTeamOutput:
1448+
) -> RedTeamResult:
14491449
"""Run a red team scan against the target using the specified strategies.
14501450
14511451
:param target: The target model or function to scan
@@ -1812,17 +1812,23 @@ def filter(self, record):
18121812
# Process results
18131813
log_section_header(self.logger, "Processing results")
18141814

1815-
# Convert results to _RedTeamResult using only red_team_info
1815+
# Convert results to RedTeamResult using only red_team_info
18161816
red_team_result = self._to_red_team_result()
1817+
scan_result = ScanResult(
1818+
scorecard=red_team_result["scorecard"],
1819+
parameters=red_team_result["parameters"],
1820+
attack_details=red_team_result["attack_details"],
1821+
studio_url=red_team_result["studio_url"],
1822+
)
18171823

18181824
# Create output with either full results or just conversations
18191825
if data_only:
18201826
self.logger.info("Data-only mode, creating output with just conversations")
1821-
output = RedTeamOutput(redteaming_data=red_team_result["redteaming_data"])
1827+
output = RedTeamResult(scan_result=scan_result, attack_details=red_team_result["attack_details"])
18221828
else:
1823-
output = RedTeamOutput(
1824-
red_team_result=red_team_result,
1825-
redteaming_data=red_team_result["redteaming_data"]
1829+
output = RedTeamResult(
1830+
scan_result=red_team_result,
1831+
attack_details=red_team_result["attack_details"]
18261832
)
18271833

18281834
# Log results to MLFlow
@@ -1837,34 +1843,34 @@ def filter(self, record):
18371843
self.logger.info("Data-only mode, returning results without evaluation")
18381844
return output
18391845

1840-
if output_path and output.red_team_result:
1846+
if output_path and output.scan_result:
18411847
# Ensure output_path is an absolute path
18421848
abs_output_path = output_path if os.path.isabs(output_path) else os.path.abspath(output_path)
18431849
self.logger.info(f"Writing output to {abs_output_path}")
1844-
_write_output(abs_output_path, output.red_team_result)
1850+
_write_output(abs_output_path, output.scan_result)
18451851

18461852
# Also save a copy to the scan output directory if available
18471853
if hasattr(self, 'scan_output_dir') and self.scan_output_dir:
18481854
final_output = os.path.join(self.scan_output_dir, "final_results.json")
1849-
_write_output(final_output, output.red_team_result)
1855+
_write_output(final_output, output.scan_result)
18501856
self.logger.info(f"Also saved a copy to {final_output}")
1851-
elif output.red_team_result and hasattr(self, 'scan_output_dir') and self.scan_output_dir:
1857+
elif output.scan_result and hasattr(self, 'scan_output_dir') and self.scan_output_dir:
18521858
# If no output_path was specified but we have scan_output_dir, save there
18531859
final_output = os.path.join(self.scan_output_dir, "final_results.json")
1854-
_write_output(final_output, output.red_team_result)
1860+
_write_output(final_output, output.scan_result)
18551861
self.logger.info(f"Saved results to {final_output}")
18561862

1857-
if output.red_team_result:
1863+
if output.scan_result:
18581864
self.logger.debug("Generating scorecard")
1859-
scorecard = self._to_scorecard(output.red_team_result)
1865+
scorecard = self._to_scorecard(output.scan_result)
18601866
# Store scorecard in a variable for accessing later if needed
18611867
self.scorecard = scorecard
18621868

18631869
# Print scorecard to console for user visibility (without extra header)
18641870
print(scorecard)
18651871

18661872
# Print URL for detailed results (once only)
1867-
studio_url = output.red_team_result.get("studio_url", "")
1873+
studio_url = output.scan_result.get("studio_url", "")
18681874
if studio_url:
18691875
print(f"\nDetailed results available at:\n{studio_url}")
18701876

0 commit comments

Comments
 (0)