Skip to content

Commit 0d7977c

Browse files
authored
add sample for trace eval (#43782)
1 parent 8241eee commit 0d7977c

File tree

1 file changed

+212
-0
lines changed

1 file changed

+212
-0
lines changed
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
# pylint: disable=line-too-long,useless-suppression
2+
# ------------------------------------
3+
# Copyright (c) Microsoft Corporation.
4+
# Licensed under the MIT License.
5+
# ------------------------------------
6+
7+
"""
8+
DESCRIPTION:
9+
Given an AIProjectClient, this sample demonstrates how to run Azure AI Evaluations
10+
against agent traces collected in Azure Application Insights. The sample fetches
11+
trace IDs for a given agent and time range, creates an evaluation group configured
12+
for trace analysis, and monitors the evaluation run until it completes.
13+
14+
USAGE:
15+
python sample_evaluations_builtin_with_traces.py
16+
17+
Before running the sample:
18+
19+
pip install "azure-ai-projects>=2.0.0b1" azure-identity azure-monitor-query python-dotenv
20+
21+
Set these environment variables with your own values:
22+
1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your
23+
Azure AI Foundry project. It has the form: https://<account_name>.services.ai.azure.com/api/projects/<project_name>.
24+
2) APPINSIGHTS_RESOURCE_ID - Required. The Azure Application Insights resource ID that stores agent traces.
25+
It has the form: /subscriptions/<subscription_id>/resourceGroups/<rg_name>/providers/Microsoft.Insights/components/<resource_name>.
26+
3) AGENT_ID - Required. The agent identifier emitted by the Azure tracing integration, used to filter traces.
27+
4) MODEL_DEPLOYMENT_NAME - Required. The Azure OpenAI deployment name to use with the built-in evaluators.
28+
5) TRACE_LOOKBACK_HOURS - Optional. Number of hours to look back when querying traces and in the evaluation run.
29+
Defaults to 1.
30+
"""
31+
32+
import os
33+
import time
34+
from datetime import datetime, timedelta, timezone
35+
from typing import Any, Dict, List
36+
37+
from dotenv import load_dotenv
38+
from azure.identity import DefaultAzureCredential
39+
from azure.monitor.query import LogsQueryClient, LogsQueryStatus
40+
from azure.ai.projects import AIProjectClient
41+
42+
from utils import pprint
43+
44+
load_dotenv()
45+
46+
47+
endpoint = os.environ[
48+
"AZURE_AI_PROJECT_ENDPOINT"
49+
] # Sample : https://<account_name>.services.ai.azure.com/api/projects/<project_name>
50+
appinsights_resource_id = os.environ[
51+
"APPINSIGHTS_RESOURCE_ID"
52+
] # Sample : /subscriptions/<subscription_id>/resourceGroups/<rg_name>/providers/Microsoft.Insights/components/<resource_name>
53+
agent_id = os.environ["AGENT_ID"] # Sample : gcp-cloud-run-agent
54+
model_deployment_name = os.environ["MODEL_DEPLOYMENT_NAME"] # Sample : gpt-4o-mini
55+
trace_query_hours = int(os.environ.get("TRACE_LOOKBACK_HOURS", "1"))
56+
57+
58+
def _build_evaluator_config(name: str, evaluator_name: str) -> Dict[str, Any]:
59+
"""Create a standard Azure AI evaluator configuration block for trace evaluations."""
60+
return {
61+
"type": "azure_ai_evaluator",
62+
"name": name,
63+
"evaluator_name": evaluator_name,
64+
"data_mapping": {
65+
"query": "{{query}}",
66+
"response": "{{response}}",
67+
"tool_definitions": "{{tool_definitions}}",
68+
},
69+
"initialization_parameters": {
70+
"deployment_name": model_deployment_name,
71+
},
72+
}
73+
74+
75+
def get_trace_ids(
76+
appinsight_resource_id: str, tracked_agent_id: str, start_time: datetime, end_time: datetime
77+
) -> List[str]:
78+
"""
79+
Query Application Insights for trace IDs (operation_Id) based on agent ID and time range.
80+
81+
Args:
82+
appinsight_resource_id: The resource ID of the Application Insights instance.
83+
tracked_agent_id: The agent ID to filter by.
84+
start_time: Start time for the query.
85+
end_time: End time for the query.
86+
87+
Returns:
88+
List of distinct operation IDs (trace IDs).
89+
"""
90+
query = f"""
91+
dependencies
92+
| where timestamp between (datetime({start_time.isoformat()}) .. datetime({end_time.isoformat()}))
93+
| extend agent_id = tostring(customDimensions["gen_ai.agent.id"])
94+
| where agent_id == "{tracked_agent_id}"
95+
| distinct operation_Id
96+
"""
97+
98+
try:
99+
with DefaultAzureCredential() as credential:
100+
client = LogsQueryClient(credential)
101+
response = client.query_resource(
102+
appinsight_resource_id,
103+
query=query,
104+
timespan=None, # Time range is specified in the query itself.
105+
)
106+
except Exception as exc: # pylint: disable=broad-except
107+
print(f"Error executing query: {exc}")
108+
return []
109+
110+
if response.status == LogsQueryStatus.SUCCESS:
111+
trace_ids: List[str] = []
112+
for table in response.tables:
113+
for row in table.rows:
114+
trace_ids.append(row[0])
115+
return trace_ids
116+
117+
print(f"Query failed with status: {response.status}")
118+
if response.partial_error:
119+
print(f"Partial error: {response.partial_error}")
120+
return []
121+
122+
123+
def main() -> None:
124+
end_time = datetime.now(tz=timezone.utc)
125+
start_time = end_time - timedelta(hours=trace_query_hours)
126+
127+
print("Querying Application Insights for trace identifiers...")
128+
print(f"Agent ID: {agent_id}")
129+
print(f"Time range: {start_time.isoformat()} to {end_time.isoformat()}")
130+
131+
trace_ids = get_trace_ids(appinsights_resource_id, agent_id, start_time, end_time)
132+
133+
if not trace_ids:
134+
print("No trace IDs found for the provided agent and time window.")
135+
return
136+
137+
print(f"\nFound {len(trace_ids)} trace IDs:")
138+
for trace_id in trace_ids:
139+
print(f" - {trace_id}")
140+
141+
with DefaultAzureCredential() as credential:
142+
with AIProjectClient(
143+
endpoint=endpoint,
144+
credential=credential,
145+
api_version="2025-11-15-preview",
146+
) as project_client:
147+
client = project_client.get_openai_client()
148+
data_source_config = {
149+
"type": "azure_ai_source",
150+
"scenario": "traces",
151+
}
152+
153+
testing_criteria = [
154+
_build_evaluator_config(
155+
name="intent_resolution",
156+
evaluator_name="builtin.intent_resolution",
157+
),
158+
_build_evaluator_config(
159+
name="task_adherence",
160+
evaluator_name="builtin.task_adherence",
161+
),
162+
]
163+
164+
print("\nCreating Eval Group")
165+
eval_object = client.evals.create(
166+
name="agent_trace_eval_group",
167+
data_source_config=data_source_config,
168+
testing_criteria=testing_criteria,
169+
)
170+
print("Eval Group created")
171+
172+
print("\nGet Eval Group by Id")
173+
eval_object_response = client.evals.retrieve(eval_object.id)
174+
print("Eval Group Response:")
175+
pprint(eval_object_response)
176+
177+
print("\nCreating Eval Run with trace IDs")
178+
run_name = f"agent_trace_eval_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
179+
eval_run_object = client.evals.runs.create(
180+
eval_id=eval_object.id,
181+
name=run_name,
182+
metadata={
183+
"agent_id": agent_id,
184+
"start_time": start_time.isoformat(),
185+
"end_time": end_time.isoformat(),
186+
},
187+
data_source={
188+
"type": "azure_ai_traces",
189+
"trace_ids": trace_ids,
190+
"lookback_hours": trace_query_hours,
191+
},
192+
)
193+
print("Eval Run created")
194+
pprint(eval_run_object)
195+
196+
print("\nMonitoring Eval Run status...")
197+
while True:
198+
run = client.evals.runs.retrieve(run_id=eval_run_object.id, eval_id=eval_object.id)
199+
print(f"Status: {run.status}")
200+
201+
if run.status in {"completed", "failed", "canceled"}:
202+
print("\nEval Run finished!")
203+
print("Final Eval Run Response:")
204+
pprint(run)
205+
break
206+
207+
time.sleep(5)
208+
print("Waiting for eval run to complete...")
209+
210+
211+
if __name__ == "__main__":
212+
main()

0 commit comments

Comments
 (0)