1+ # pylint: disable=line-too-long,useless-suppression
2+ # ------------------------------------
3+ # Copyright (c) Microsoft Corporation.
4+ # Licensed under the MIT License.
5+ # ------------------------------------
6+
7+ """
8+ DESCRIPTION:
9+ Given an AIProjectClient, this sample demonstrates how to use the synchronous
10+ `openai.evals.*` methods to create, get and list eval group and and eval runs.
11+
12+ USAGE:
13+ python sample_redteam_evaluations.py
14+
15+ Before running the sample:
16+
17+ pip install azure-ai-projects azure-identity azure-ai-projects>=2.0.0b1 python-dotenv
18+
19+ Set these environment variables with your own values:
20+ 1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your
21+ Azure AI Foundry project. It has the form: https://<account_name>.services.ai.azure.com/api/projects/<project_name>.
22+ 2) DATA_FOLDER - Optional. The folder path where the data files for upload are located.
23+ 3) AGENT_NAME - Required. The name of the Agent to perform red teaming evaluation on.
24+ """
25+
26+ import os
27+
28+ from dotenv import load_dotenv
29+ from pprint import pprint
30+ from azure .identity import DefaultAzureCredential
31+ from azure .ai .projects import AIProjectClient
32+ from azure .ai .projects .models import (
33+ AgentVersionObject ,
34+ EvaluationTaxonomy ,
35+ AzureAIAgentTarget ,
36+ AgentTaxonomyInput ,
37+ RiskCategory
38+ )
39+ import json
40+ import time
41+ from azure .ai .projects .models import EvaluationTaxonomy
42+
43+ def main () -> None :
44+ load_dotenv ()
45+ #
46+ endpoint = os .environ .get ("AZURE_AI_PROJECT_ENDPOINT" , "" ) # Sample : https://<account_name>.services.ai.azure.com/api/projects/<project_name>
47+ agent_name = os .environ .get ("AGENT_NAME" , "" )
48+
49+ # Construct the paths to the data folder and data file used in this sample
50+ script_dir = os .path .dirname (os .path .abspath (__file__ ))
51+ data_folder = os .environ .get ("DATA_FOLDER" , os .path .join (script_dir , "data_folder" ))
52+
53+ with DefaultAzureCredential () as credential :
54+ with AIProjectClient (endpoint = endpoint , credential = credential , api_version = "2025-11-15-preview" ) as project_client :
55+ print ("Creating an OpenAI client from the AI Project client" )
56+ client = project_client .get_openai_client ()
57+
58+ agent_versions = project_client .agents .retrieve (agent_name = agent_name )
59+ agent = agent_versions .versions .latest
60+ agent_version = agent .version
61+ print (f"Retrieved agent: { agent_name } , version: { agent_version } " )
62+ eval_group_name = "Red Team Agent Safety Eval Group -" + str (int (time .time ()))
63+ eval_run_name = f"Red Team Agent Safety Eval Run for { agent_name } -" + str (int (time .time ()))
64+ data_source_config = {
65+ "type" : "azure_ai_source" ,
66+ "scenario" : "red_team"
67+ }
68+
69+ testing_criteria = _get_agent_safety_evaluation_criteria ()
70+ print (f"Defining testing criteria for red teaming for agent target" )
71+ pprint (testing_criteria )
72+
73+ print ("Creating Eval Group" )
74+ eval_object = client .evals .create (
75+ name = eval_group_name ,
76+ data_source_config = data_source_config ,
77+ testing_criteria = testing_criteria ,
78+ )
79+ print (f"Eval Group created for red teaming: { eval_group_name } " )
80+
81+ print (f"Get Eval Group by Id: { eval_object .id } " )
82+ eval_object_response = client .evals .retrieve (eval_object .id )
83+ print ("Eval Group Response:" )
84+ pprint (eval_object_response )
85+
86+ risk_categories_for_taxonomy = [ RiskCategory .PROHIBITED_ACTIONS ]
87+ target = AzureAIAgentTarget (name = agent_name , version = agent_version , tool_descriptions = _get_tool_descriptions (agent ))
88+ agent_taxonomy_input = AgentTaxonomyInput (risk_categories = risk_categories_for_taxonomy , target = target )
89+ print ("Creating Eval Taxonomies" )
90+ eval_taxonomy_input = EvaluationTaxonomy (
91+ description = "Taxonomy for red teaming evaluation" ,
92+ taxonomy_input = agent_taxonomy_input )
93+
94+ taxonomy = project_client .evaluation_taxonomies .create (name = agent_name , body = eval_taxonomy_input )
95+ taxonomy_path = os .path .join (data_folder , f"taxonomy_{ agent_name } .json" )
96+ # Create the data folder if it doesn't exist
97+ os .makedirs (data_folder , exist_ok = True )
98+ with open (taxonomy_path , "w" ) as f :
99+ f .write (json .dumps (_to_json_primitive (taxonomy ), indent = 2 ))
100+ print (f"RedTeaming Taxonomy created for agent: { agent_name } . Taxonomy written to { taxonomy_path } " )
101+
102+ print ("Creating RedTeaming Eval Run" )
103+ eval_run_object = client .evals .runs .create (
104+ eval_id = eval_object .id ,
105+ name = eval_run_name ,
106+ data_source = {
107+ "type" : "azure_ai_red_team" ,
108+ "item_generation_params" : {
109+ "type" : "red_team_taxonomy" ,
110+ "attack_strategies" : [
111+ "Flip" ,
112+ "Base64"
113+ ],
114+ "num_turns" : 5 ,
115+ "source" : {
116+ "type" : "file_id" ,
117+ "id" : taxonomy .id
118+ }
119+ },
120+ "target" : target .as_dict ()
121+ }
122+ )
123+
124+ print (f"Eval Run created for red teaming: { eval_run_name } " )
125+ pprint (eval_run_object )
126+
127+ print (f"Get Eval Run by Id: { eval_run_object .id } " )
128+ eval_run_response = client .evals .runs .retrieve (run_id = eval_run_object .id , eval_id = eval_object .id )
129+ print ("Eval Run Response:" )
130+ pprint (eval_run_response )
131+
132+ while True :
133+ run = client .evals .runs .retrieve (run_id = eval_run_response .id , eval_id = eval_object .id )
134+ if run .status == "completed" or run .status == "failed" :
135+ output_items = list (client .evals .runs .output_items .list (
136+ run_id = run .id , eval_id = eval_object .id
137+ ))
138+ output_items_path = os .path .join (data_folder , f"redteam_eval_output_items_{ agent_name } .json" )
139+ # Create the data folder if it doesn't exist
140+ os .makedirs (data_folder , exist_ok = True )
141+ with open (output_items_path , "w" ) as f :
142+ f .write (json .dumps (_to_json_primitive (output_items ), indent = 2 ))
143+ print (f"RedTeam Eval Run completed with status: { run .status } . Output items written to { output_items_path } " )
144+ break
145+ time .sleep (5 )
146+ print ("Waiting for eval run to complete..." )
147+
148+ # [END evaluations_sample]
149+
150+ def _get_tool_descriptions (agent : AgentVersionObject ):
151+ tools = agent .definition .get ("tools" , [])
152+ tool_descriptions = []
153+ for tool in tools :
154+ if tool ["type" ] == "openapi" :
155+ tool_descriptions .append ({
156+ "name" : tool ["openapi" ]["name" ],
157+ "description" : tool ["openapi" ]["description" ] if "description" in tool ["openapi" ] else "No description provided"
158+ })
159+ else :
160+ tool_descriptions .append ({
161+ "name" : tool ["name" ] if "name" in tool else "Unnamed Tool" ,
162+ "description" : tool ["description" ] if "description" in tool else "No description provided"
163+ })
164+
165+ return tool_descriptions
166+
167+ def _get_agent_safety_evaluation_criteria ():
168+ return [
169+ {
170+ "type" : "azure_ai_evaluator" ,
171+ "name" : "Prohibited Actions" ,
172+ "evaluator_name" : "builtin.prohibited_actions" ,
173+ "evaluator_version" : "1"
174+ },
175+ {
176+ "type" : "azure_ai_evaluator" ,
177+ "name" : "Task Adherence" ,
178+ "evaluator_name" : "builtin.task_adherence" ,
179+ "evaluator_version" : "1"
180+ },
181+ {
182+ "type" : "azure_ai_evaluator" ,
183+ "name" : "Sensitive Data Leakage" ,
184+ "evaluator_name" : "builtin.sensitive_data_leakage" ,
185+ "evaluator_version" : "1"
186+ },
187+ {
188+ "type" : "azure_ai_evaluator" ,
189+ "name" : "Self Harm" ,
190+ "evaluator_name" : "builtin.self_harm" ,
191+ "evaluator_version" : "1"
192+ },
193+ {
194+ "type" : "azure_ai_evaluator" ,
195+ "name" : "Violence" ,
196+ "evaluator_name" : "builtin.violence" ,
197+ "evaluator_version" : "1"
198+ },
199+ {
200+ "type" : "azure_ai_evaluator" ,
201+ "name" : "Sexual" ,
202+ "evaluator_name" : "builtin.sexual" ,
203+ "evaluator_version" : "1"
204+ },
205+ {
206+ "type" : "azure_ai_evaluator" ,
207+ "name" : "Hate Unfairness" ,
208+ "evaluator_name" : "builtin.hate_unfairness" ,
209+ "evaluator_version" : "1"
210+ }
211+ ]
212+
213+ def _to_json_primitive (obj ):
214+ if obj is None or isinstance (obj , (str , int , float , bool )):
215+ return obj
216+ if isinstance (obj , (list , tuple )):
217+ return [_to_json_primitive (i ) for i in obj ]
218+ if isinstance (obj , dict ):
219+ return {k : _to_json_primitive (v ) for k , v in obj .items ()}
220+ for method in ("to_dict" , "as_dict" , "dict" , "serialize" ):
221+ if hasattr (obj , method ):
222+ try :
223+ return _to_json_primitive (getattr (obj , method )())
224+ except Exception :
225+ pass
226+ if hasattr (obj , "__dict__" ):
227+ return _to_json_primitive ({k : v for k , v in vars (obj ).items () if not k .startswith ("_" )})
228+ return str (obj )
229+
230+ if __name__ == "__main__" :
231+ main ()
0 commit comments