Skip to content

Commit 3871804

Browse files
authored
Aprilk/agent target insights samples (#43792)
* Added samples * Ran black
1 parent 19ef60c commit 3871804

33 files changed

+2190
-2511
lines changed

sdk/ai/azure-ai-projects/azure/ai/projects/models/_models.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5671,7 +5671,9 @@ class FileSearchTool(Tool, discriminator="file_search"):
56715671
visibility=["read", "create", "update", "delete", "query"]
56725672
)
56735673
"""Ranking options for search."""
5674-
filters: Optional[Union["_models.ComparisonFilter", "_models.CompoundFilter"]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
5674+
filters: Optional[Union["_models.ComparisonFilter", "_models.CompoundFilter"]] = rest_field(
5675+
visibility=["read", "create", "update", "delete", "query"]
5676+
)
56755677
"""A filter to apply. Is either a ComparisonFilter type or a CompoundFilter type."""
56765678

56775679
@overload
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
# ------------------------------------
2+
# Copyright (c) Microsoft Corporation.
3+
# Licensed under the MIT License.
4+
# ------------------------------------
5+
"""
6+
DESCRIPTION:
7+
This sample demonstrates how to create and run an evaluation for an Azure AI agent
8+
using the synchronous AIProjectClient.
9+
10+
The OpenAI compatible Evals calls in this sample are made using
11+
the OpenAI client from the `openai` package. See https://platform.openai.com/docs/api-reference
12+
for more information.
13+
14+
USAGE:
15+
python sample_agent_evaluation.py
16+
17+
Before running the sample:
18+
19+
pip install "azure-ai-projects>=2.0.0b1" azure-identity python-dotenv
20+
21+
Set these environment variables with your own values:
22+
1) AZURE_AI_PROJECT_ENDPOINT - The Azure AI Project endpoint, as found in the Overview
23+
page of your Azure AI Foundry portal.
24+
2) AZURE_AI_MODEL_DEPLOYMENT_NAME - The deployment name of the AI model, as found under the "Name" column in
25+
the "Models + endpoints" tab in your Azure AI Foundry project.
26+
"""
27+
28+
import os
29+
import time
30+
from pprint import pprint
31+
from dotenv import load_dotenv
32+
from azure.identity import DefaultAzureCredential
33+
from azure.ai.projects import AIProjectClient
34+
from azure.ai.projects.models import PromptAgentDefinition
35+
from openai.types.eval_create_params import DataSourceConfigCustom
36+
37+
load_dotenv()
38+
39+
project_client = AIProjectClient(
40+
endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"],
41+
credential=DefaultAzureCredential(),
42+
)
43+
44+
with project_client:
45+
46+
openai_client = project_client.get_openai_client()
47+
48+
agent = project_client.agents.create_version(
49+
agent_name=os.environ["AZURE_AI_AGENT_NAME"],
50+
definition=PromptAgentDefinition(
51+
model=os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"],
52+
instructions="You are a helpful assistant that answers general questions",
53+
),
54+
)
55+
print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})")
56+
57+
data_source_config = DataSourceConfigCustom(
58+
type="custom",
59+
item_schema={"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]},
60+
include_sample_schema=True,
61+
)
62+
testing_criteria = [
63+
{
64+
"type": "azure_ai_evaluator",
65+
"name": "violence_detection",
66+
"evaluator_name": "builtin.violence",
67+
"data_mapping": {"query": "{{item.query}}", "response": "{{item.response}}"},
68+
}
69+
]
70+
eval_object = openai_client.evals.create(
71+
name="Agent Evaluation",
72+
data_source_config=data_source_config,
73+
testing_criteria=testing_criteria,
74+
)
75+
print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})")
76+
77+
data_source = {
78+
"type": "azure_ai_target_completions",
79+
"source": {
80+
"type": "file_content",
81+
"content": [
82+
{"item": {"query": "What is the capital of France?"}},
83+
{"item": {"query": "How do I reverse a string in Python?"}},
84+
],
85+
},
86+
"input_messages": {
87+
"type": "template",
88+
"template": [
89+
{"type": "message", "role": "user", "content": {"type": "input_text", "text": "{{item.query}}"}}
90+
],
91+
},
92+
"target": {
93+
"type": "azure_ai_agent",
94+
"name": agent.name,
95+
"version": agent.version, # Version is optional. Defaults to latest version if not specified
96+
},
97+
}
98+
99+
agent_eval_run = openai_client.evals.runs.create(
100+
eval_id=eval_object.id, name=f"Evaluation Run for Agent {agent.name}", data_source=data_source
101+
)
102+
print(f"Evaluation run created (id: {agent_eval_run.id})")
103+
104+
while agent_eval_run.status not in ["completed", "failed"]:
105+
agent_eval_run = openai_client.evals.runs.retrieve(run_id=agent_eval_run.id, eval_id=eval_object.id)
106+
print(f"Waiting for eval run to complete... current status: {agent_eval_run.status}")
107+
time.sleep(5)
108+
109+
if agent_eval_run.status == "completed":
110+
print("\n✓ Evaluation run completed successfully!")
111+
print(f"Result Counts: {agent_eval_run.result_counts}")
112+
113+
output_items = list(
114+
openai_client.evals.runs.output_items.list(run_id=agent_eval_run.id, eval_id=eval_object.id)
115+
)
116+
print(f"\nOUTPUT ITEMS (Total: {len(output_items)})")
117+
print(f"{'-'*60}")
118+
pprint(output_items)
119+
print(f"{'-'*60}")
120+
else:
121+
print("\n✗ Evaluation run failed.")
122+
123+
openai_client.evals.delete(eval_id=eval_object.id)
124+
print("Evaluation deleted")
125+
126+
project_client.agents.delete(agent_name=agent.name)
127+
print("Agent deleted")
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# ------------------------------------
2+
# Copyright (c) Microsoft Corporation.
3+
# Licensed under the MIT License.
4+
# ------------------------------------
5+
"""
6+
DESCRIPTION:
7+
This sample demonstrates how to create and run an evaluation for an Azure AI agent response
8+
using the synchronous AIProjectClient.
9+
10+
The OpenAI compatible Evals calls in this sample are made using
11+
the OpenAI client from the `openai` package. See https://platform.openai.com/docs/api-reference
12+
for more information.
13+
14+
USAGE:
15+
python sample_agent_response_evaluation.py
16+
17+
Before running the sample:
18+
19+
pip install "azure-ai-projects>=2.0.0b1" azure-identity python-dotenv
20+
21+
Set these environment variables with your own values:
22+
1) AZURE_AI_PROJECT_ENDPOINT - The Azure AI Project endpoint, as found in the Overview
23+
page of your Azure AI Foundry portal.
24+
2) AZURE_AI_MODEL_DEPLOYMENT_NAME - The deployment name of the AI model, as found under the "Name" column in
25+
the "Models + endpoints" tab in your Azure AI Foundry project.
26+
"""
27+
28+
import os
29+
import time
30+
from pprint import pprint
31+
from dotenv import load_dotenv
32+
from azure.identity import DefaultAzureCredential
33+
from azure.ai.projects import AIProjectClient
34+
from azure.ai.projects.models import PromptAgentDefinition
35+
36+
load_dotenv()
37+
38+
project_client = AIProjectClient(
39+
endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"],
40+
credential=DefaultAzureCredential(),
41+
)
42+
43+
with project_client:
44+
45+
openai_client = project_client.get_openai_client()
46+
47+
agent = project_client.agents.create_version(
48+
agent_name=os.environ["AZURE_AI_AGENT_NAME"],
49+
definition=PromptAgentDefinition(
50+
model=os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"],
51+
instructions="You are a helpful assistant that answers general questions",
52+
),
53+
)
54+
print(f"Agent created (id: {agent.id}, name: {agent.name}, version: {agent.version})")
55+
56+
conversation = openai_client.conversations.create(
57+
items=[{"type": "message", "role": "user", "content": "What is the size of France in square miles?"}],
58+
)
59+
print(f"Created conversation with initial user message (id: {conversation.id})")
60+
61+
response = openai_client.responses.create(
62+
conversation=conversation.id,
63+
extra_body={"agent": {"name": agent.name, "type": "agent_reference"}},
64+
input="", # TODO: Remove 'input' once service is fixed
65+
)
66+
print(f"Response output: {response.output_text} (id: {response.id})")
67+
68+
data_source_config = {"type": "azure_ai_source", "scenario": "responses"}
69+
testing_criteria = [
70+
{"type": "azure_ai_evaluator", "name": "violence_detection", "evaluator_name": "builtin.violence"}
71+
]
72+
eval_object = openai_client.evals.create(
73+
name="Agent Response Evaluation",
74+
data_source_config=data_source_config,
75+
testing_criteria=testing_criteria,
76+
)
77+
print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})")
78+
79+
data_source = {
80+
"type": "azure_ai_responses",
81+
"item_generation_params": {
82+
"type": "response_retrieval",
83+
"data_mapping": {"response_id": "{{item.resp_id}}"},
84+
"source": {"type": "file_content", "content": [{"item": {"resp_id": response.id}}]},
85+
},
86+
}
87+
88+
response_eval_run = openai_client.evals.runs.create(
89+
eval_id=eval_object.id, name=f"Evaluation Run for Agent {agent.name}", data_source=data_source
90+
)
91+
print(f"Evaluation run created (id: {response_eval_run.id})")
92+
93+
while response_eval_run.status not in ["completed", "failed"]:
94+
response_eval_run = openai_client.evals.runs.retrieve(run_id=response_eval_run.id, eval_id=eval_object.id)
95+
print(f"Waiting for eval run to complete... current status: {response_eval_run.status}")
96+
time.sleep(5)
97+
98+
if response_eval_run.status == "completed":
99+
print("\n✓ Evaluation run completed successfully!")
100+
print(f"Result Counts: {response_eval_run.result_counts}")
101+
102+
output_items = list(
103+
openai_client.evals.runs.output_items.list(run_id=response_eval_run.id, eval_id=eval_object.id)
104+
)
105+
print(f"\nOUTPUT ITEMS (Total: {len(output_items)})")
106+
print(f"{'-'*60}")
107+
pprint(output_items)
108+
print(f"{'-'*60}")
109+
else:
110+
print("\n✗ Evaluation run failed.")
111+
112+
openai_client.evals.delete(eval_id=eval_object.id)
113+
print("Evaluation deleted")
114+
115+
project_client.agents.delete(agent_name=agent.name)
116+
print("Agent deleted")

sdk/ai/azure-ai-projects/samples/evaluation/sample_agentic_evaluators/sample_coherence.py

Lines changed: 28 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from openai.types.evals.create_eval_jsonl_run_data_source_param import (
3535
CreateEvalJSONLRunDataSourceParam,
3636
SourceFileContent,
37-
SourceFileContentContent
37+
SourceFileContentContent,
3838
)
3939

4040

@@ -46,46 +46,36 @@ def main() -> None:
4646
"AZURE_AI_PROJECT_ENDPOINT"
4747
] # Sample : https://<account_name>.services.ai.azure.com/api/projects/<project_name>
4848
model_deployment_name = os.environ.get("AZURE_AI_MODEL_DEPLOYMENT_NAME", "") # Sample : gpt-4o-mini
49-
49+
5050
with DefaultAzureCredential() as credential:
51-
with AIProjectClient(endpoint=endpoint, credential=credential, api_version="2025-11-15-preview") as project_client:
51+
with AIProjectClient(
52+
endpoint=endpoint, credential=credential, api_version="2025-11-15-preview"
53+
) as project_client:
5254
print("Creating an OpenAI client from the AI Project client")
53-
55+
5456
client = project_client.get_openai_client()
5557
client._custom_query = {"api-version": "2025-11-15-preview"}
56-
58+
5759
data_source_config = {
5860
"type": "custom",
5961
"item_schema": {
6062
"type": "object",
61-
"properties": {
62-
"query": {
63-
"type": "string"
64-
},
65-
"response": {
66-
"type": "string"
67-
}
68-
},
69-
"required": []
63+
"properties": {"query": {"type": "string"}, "response": {"type": "string"}},
64+
"required": [],
7065
},
71-
"include_sample_schema": True
66+
"include_sample_schema": True,
7267
}
73-
68+
7469
testing_criteria = [
7570
{
7671
"type": "azure_ai_evaluator",
7772
"name": "coherence",
7873
"evaluator_name": "builtin.coherence",
79-
"initialization_parameters": {
80-
"deployment_name": f"{model_deployment_name}"
81-
},
82-
"data_mapping": {
83-
"query": "{{item.query}}",
84-
"response": "{{item.response}}"
85-
}
74+
"initialization_parameters": {"deployment_name": f"{model_deployment_name}"},
75+
"data_mapping": {"query": "{{item.query}}", "response": "{{item.response}}"},
8676
}
8777
]
88-
78+
8979
print("Creating Eval Group")
9080
eval_object = client.evals.create(
9181
name="Test Coherence Evaluator with inline data",
@@ -111,34 +101,21 @@ def main() -> None:
111101
eval_run_object = client.evals.runs.create(
112102
eval_id=eval_object.id,
113103
name="inline_data_run",
114-
metadata={
115-
"team": "eval-exp",
116-
"scenario": "inline-data-v1"
117-
},
104+
metadata={"team": "eval-exp", "scenario": "inline-data-v1"},
118105
data_source=CreateEvalJSONLRunDataSourceParam(
119-
type="jsonl",
106+
type="jsonl",
120107
source=SourceFileContent(
121108
type="file_content",
122-
content= [
109+
content=[
123110
# Success example - coherent response
124-
SourceFileContentContent(
125-
item= {
126-
"query": success_query,
127-
"response": success_response
128-
}
129-
),
111+
SourceFileContentContent(item={"query": success_query, "response": success_response}),
130112
# Failure example - incoherent response
131-
SourceFileContentContent(
132-
item= {
133-
"query": failure_query,
134-
"response": failure_response
135-
}
136-
)
137-
]
138-
)
139-
)
113+
SourceFileContentContent(item={"query": failure_query, "response": failure_response}),
114+
],
115+
),
116+
),
140117
)
141-
118+
142119
print(f"Eval Run created")
143120
pprint(eval_run_object)
144121

@@ -151,16 +128,15 @@ def main() -> None:
151128

152129
while True:
153130
run = client.evals.runs.retrieve(run_id=eval_run_response.id, eval_id=eval_object.id)
154-
if run.status == "completed" or run.status == "failed":
155-
output_items = list(client.evals.runs.output_items.list(
156-
run_id=run.id, eval_id=eval_object.id
157-
))
131+
if run.status == "completed" or run.status == "failed":
132+
output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id))
158133
pprint(output_items)
159134
print(f"Eval Run Status: {run.status}")
160135
print(f"Eval Run Report URL: {run.report_url}")
161136
break
162137
time.sleep(5)
163138
print("Waiting for eval run to complete...")
164-
139+
140+
165141
if __name__ == "__main__":
166-
main()
142+
main()

0 commit comments

Comments
 (0)