Skip to content

Commit 19ef60c

Browse files
Added Red Teaming Evaluation, Evaluation Taxonomy, Scheduled RedTeaming/Dataset Evaluations (#43788)
* updated * updated
1 parent 0d7977c commit 19ef60c

File tree

3 files changed

+600
-1
lines changed

3 files changed

+600
-1
lines changed

sdk/ai/azure-ai-projects/samples/evaluation/sample_evaluations_builtin_with_dataset_id.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@
158158
type="jsonl",
159159
source=SourceFileID(
160160
type ="file_id",
161-
id=dataset_id
161+
id=dataset.id if dataset.id else ""
162162
)
163163
)
164164
)
Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
# pylint: disable=line-too-long,useless-suppression
2+
# ------------------------------------
3+
# Copyright (c) Microsoft Corporation.
4+
# Licensed under the MIT License.
5+
# ------------------------------------
6+
7+
"""
8+
DESCRIPTION:
9+
Given an AIProjectClient, this sample demonstrates how to use the synchronous
10+
`openai.evals.*` methods to create, get and list eval group and and eval runs.
11+
12+
USAGE:
13+
python sample_redteam_evaluations.py
14+
15+
Before running the sample:
16+
17+
pip install azure-ai-projects azure-identity azure-ai-projects>=2.0.0b1 python-dotenv
18+
19+
Set these environment variables with your own values:
20+
1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your
21+
Azure AI Foundry project. It has the form: https://<account_name>.services.ai.azure.com/api/projects/<project_name>.
22+
2) DATA_FOLDER - Optional. The folder path where the data files for upload are located.
23+
3) AGENT_NAME - Required. The name of the Agent to perform red teaming evaluation on.
24+
"""
25+
26+
import os
27+
28+
from dotenv import load_dotenv
29+
from pprint import pprint
30+
from azure.identity import DefaultAzureCredential
31+
from azure.ai.projects import AIProjectClient
32+
from azure.ai.projects.models import (
33+
AgentVersionObject,
34+
EvaluationTaxonomy,
35+
AzureAIAgentTarget,
36+
AgentTaxonomyInput,
37+
RiskCategory
38+
)
39+
import json
40+
import time
41+
from azure.ai.projects.models import EvaluationTaxonomy
42+
43+
def main() -> None:
44+
load_dotenv()
45+
#
46+
endpoint = os.environ.get("AZURE_AI_PROJECT_ENDPOINT", "") # Sample : https://<account_name>.services.ai.azure.com/api/projects/<project_name>
47+
agent_name = os.environ.get("AGENT_NAME", "")
48+
49+
# Construct the paths to the data folder and data file used in this sample
50+
script_dir = os.path.dirname(os.path.abspath(__file__))
51+
data_folder = os.environ.get("DATA_FOLDER", os.path.join(script_dir, "data_folder"))
52+
53+
with DefaultAzureCredential() as credential:
54+
with AIProjectClient(endpoint=endpoint, credential=credential, api_version="2025-11-15-preview") as project_client:
55+
print("Creating an OpenAI client from the AI Project client")
56+
client = project_client.get_openai_client()
57+
58+
agent_versions = project_client.agents.retrieve(agent_name=agent_name)
59+
agent = agent_versions.versions.latest
60+
agent_version = agent.version
61+
print(f"Retrieved agent: {agent_name}, version: {agent_version}")
62+
eval_group_name = "Red Team Agent Safety Eval Group -" + str(int(time.time()))
63+
eval_run_name = f"Red Team Agent Safety Eval Run for {agent_name} -" + str(int(time.time()))
64+
data_source_config = {
65+
"type": "azure_ai_source",
66+
"scenario": "red_team"
67+
}
68+
69+
testing_criteria = _get_agent_safety_evaluation_criteria()
70+
print(f"Defining testing criteria for red teaming for agent target")
71+
pprint(testing_criteria)
72+
73+
print("Creating Eval Group")
74+
eval_object = client.evals.create(
75+
name=eval_group_name,
76+
data_source_config=data_source_config,
77+
testing_criteria=testing_criteria,
78+
)
79+
print(f"Eval Group created for red teaming: {eval_group_name}")
80+
81+
print(f"Get Eval Group by Id: {eval_object.id}")
82+
eval_object_response = client.evals.retrieve(eval_object.id)
83+
print("Eval Group Response:")
84+
pprint(eval_object_response)
85+
86+
risk_categories_for_taxonomy = [ RiskCategory.PROHIBITED_ACTIONS ]
87+
target=AzureAIAgentTarget(name=agent_name, version=agent_version, tool_descriptions=_get_tool_descriptions(agent))
88+
agent_taxonomy_input = AgentTaxonomyInput(risk_categories=risk_categories_for_taxonomy, target=target)
89+
print("Creating Eval Taxonomies")
90+
eval_taxonomy_input = EvaluationTaxonomy(
91+
description="Taxonomy for red teaming evaluation",
92+
taxonomy_input=agent_taxonomy_input)
93+
94+
taxonomy = project_client.evaluation_taxonomies.create(name=agent_name, body=eval_taxonomy_input)
95+
taxonomy_path = os.path.join(data_folder, f"taxonomy_{agent_name}.json")
96+
# Create the data folder if it doesn't exist
97+
os.makedirs(data_folder, exist_ok=True)
98+
with open(taxonomy_path, "w") as f:
99+
f.write(json.dumps(_to_json_primitive(taxonomy), indent=2))
100+
print(f"RedTeaming Taxonomy created for agent: {agent_name}. Taxonomy written to {taxonomy_path}")
101+
102+
print("Creating RedTeaming Eval Run")
103+
eval_run_object = client.evals.runs.create(
104+
eval_id=eval_object.id,
105+
name=eval_run_name,
106+
data_source={
107+
"type": "azure_ai_red_team",
108+
"item_generation_params": {
109+
"type": "red_team_taxonomy",
110+
"attack_strategies": [
111+
"Flip",
112+
"Base64"
113+
],
114+
"num_turns": 5,
115+
"source": {
116+
"type": "file_id",
117+
"id": taxonomy.id
118+
}
119+
},
120+
"target": target.as_dict()
121+
}
122+
)
123+
124+
print(f"Eval Run created for red teaming: {eval_run_name}")
125+
pprint(eval_run_object)
126+
127+
print(f"Get Eval Run by Id: {eval_run_object.id}")
128+
eval_run_response = client.evals.runs.retrieve(run_id=eval_run_object.id, eval_id=eval_object.id)
129+
print("Eval Run Response:")
130+
pprint(eval_run_response)
131+
132+
while True:
133+
run = client.evals.runs.retrieve(run_id=eval_run_response.id, eval_id=eval_object.id)
134+
if run.status == "completed" or run.status == "failed":
135+
output_items = list(client.evals.runs.output_items.list(
136+
run_id=run.id, eval_id=eval_object.id
137+
))
138+
output_items_path = os.path.join(data_folder, f"redteam_eval_output_items_{agent_name}.json")
139+
# Create the data folder if it doesn't exist
140+
os.makedirs(data_folder, exist_ok=True)
141+
with open(output_items_path, "w") as f:
142+
f.write(json.dumps(_to_json_primitive(output_items), indent=2))
143+
print(f"RedTeam Eval Run completed with status: {run.status}. Output items written to {output_items_path}")
144+
break
145+
time.sleep(5)
146+
print("Waiting for eval run to complete...")
147+
148+
# [END evaluations_sample]
149+
150+
def _get_tool_descriptions(agent: AgentVersionObject):
151+
tools = agent.definition.get("tools", [])
152+
tool_descriptions = []
153+
for tool in tools:
154+
if tool["type"] == "openapi":
155+
tool_descriptions.append({
156+
"name": tool["openapi"]["name"],
157+
"description": tool["openapi"]["description"] if "description" in tool["openapi"] else "No description provided"
158+
})
159+
else:
160+
tool_descriptions.append({
161+
"name": tool["name"] if "name" in tool else "Unnamed Tool",
162+
"description": tool["description"] if "description" in tool else "No description provided"
163+
})
164+
165+
return tool_descriptions
166+
167+
def _get_agent_safety_evaluation_criteria():
168+
return [
169+
{
170+
"type": "azure_ai_evaluator",
171+
"name": "Prohibited Actions",
172+
"evaluator_name": "builtin.prohibited_actions",
173+
"evaluator_version": "1"
174+
},
175+
{
176+
"type": "azure_ai_evaluator",
177+
"name": "Task Adherence",
178+
"evaluator_name": "builtin.task_adherence",
179+
"evaluator_version": "1"
180+
},
181+
{
182+
"type": "azure_ai_evaluator",
183+
"name": "Sensitive Data Leakage",
184+
"evaluator_name": "builtin.sensitive_data_leakage",
185+
"evaluator_version": "1"
186+
},
187+
{
188+
"type": "azure_ai_evaluator",
189+
"name": "Self Harm",
190+
"evaluator_name": "builtin.self_harm",
191+
"evaluator_version": "1"
192+
},
193+
{
194+
"type": "azure_ai_evaluator",
195+
"name": "Violence",
196+
"evaluator_name": "builtin.violence",
197+
"evaluator_version": "1"
198+
},
199+
{
200+
"type": "azure_ai_evaluator",
201+
"name": "Sexual",
202+
"evaluator_name": "builtin.sexual",
203+
"evaluator_version": "1"
204+
},
205+
{
206+
"type": "azure_ai_evaluator",
207+
"name": "Hate Unfairness",
208+
"evaluator_name": "builtin.hate_unfairness",
209+
"evaluator_version": "1"
210+
}
211+
]
212+
213+
def _to_json_primitive(obj):
214+
if obj is None or isinstance(obj, (str, int, float, bool)):
215+
return obj
216+
if isinstance(obj, (list, tuple)):
217+
return [_to_json_primitive(i) for i in obj]
218+
if isinstance(obj, dict):
219+
return {k: _to_json_primitive(v) for k, v in obj.items()}
220+
for method in ("to_dict", "as_dict", "dict", "serialize"):
221+
if hasattr(obj, method):
222+
try:
223+
return _to_json_primitive(getattr(obj, method)())
224+
except Exception:
225+
pass
226+
if hasattr(obj, "__dict__"):
227+
return _to_json_primitive({k: v for k, v in vars(obj).items() if not k.startswith("_")})
228+
return str(obj)
229+
230+
if __name__ == "__main__":
231+
main()

0 commit comments

Comments
 (0)